vp9_rdopt.c 153 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
11
12
13
14
15
 */


#include <stdio.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
16

17
#include "vp9/common/vp9_pragmas.h"
18
19
20
21
22
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_modecosts.h"
#include "vp9/encoder/vp9_encodeintra.h"
23
24
25
26
27
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_quant_common.h"
28
29
30
31
32
33
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_variance.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_ratectrl.h"
John Koleszar's avatar
John Koleszar committed
34
#include "vpx_mem/vpx_mem.h"
35
36
37
38
39
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_entropy.h"
40
#include "vp9_rtcd.h"
41
#include "vp9/common/vp9_mvref_common.h"
Ronald S. Bultje's avatar
Ronald S. Bultje committed
42
#include "vp9/common/vp9_common.h"
Paul Wilkins's avatar
Paul Wilkins committed
43

44
45
#define INVALID_MV 0x80008000

46
47
48
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1

49
50
51
DECLARE_ALIGNED(16, extern const uint8_t,
                vp9_pt_energy_class[MAX_ENTROPY_TOKENS]);

Ronald S. Bultje's avatar
Ronald S. Bultje committed
52
#define I4X4_PRED 0x8000
Ronald S. Bultje's avatar
Ronald S. Bultje committed
53
#define SPLITMV 0x10000
Ronald S. Bultje's avatar
Ronald S. Bultje committed
54

55
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
56
  {NEARESTMV, LAST_FRAME,   NONE},
Paul Wilkins's avatar
Paul Wilkins committed
57
58
59
60
  {NEARESTMV, ALTREF_FRAME, NONE},
  {NEARESTMV, GOLDEN_FRAME, NONE},
  {NEWMV,     LAST_FRAME,   NONE},
  {NEARESTMV, LAST_FRAME,   ALTREF_FRAME},
61
  {NEARMV,    LAST_FRAME,   NONE},
Paul Wilkins's avatar
Paul Wilkins committed
62
  {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
John Koleszar's avatar
John Koleszar committed
63

Yaowu Xu's avatar
Yaowu Xu committed
64
65
  {DC_PRED,   INTRA_FRAME,  NONE},

Paul Wilkins's avatar
Paul Wilkins committed
66
67
  {NEWMV,     GOLDEN_FRAME, NONE},
  {NEWMV,     ALTREF_FRAME, NONE},
68
  {NEARMV,    ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
69

70
71
  {TM_PRED,   INTRA_FRAME,  NONE},

Paul Wilkins's avatar
Paul Wilkins committed
72
73
74
75
76
  {NEARMV,    LAST_FRAME,   ALTREF_FRAME},
  {NEWMV,     LAST_FRAME,   ALTREF_FRAME},
  {NEARMV,    GOLDEN_FRAME, NONE},
  {NEARMV,    GOLDEN_FRAME, ALTREF_FRAME},
  {NEWMV,     GOLDEN_FRAME, ALTREF_FRAME},
John Koleszar's avatar
John Koleszar committed
77

78
79
80
  {SPLITMV,   LAST_FRAME,   NONE},
  {SPLITMV,   GOLDEN_FRAME, NONE},
  {SPLITMV,   ALTREF_FRAME, NONE},
Paul Wilkins's avatar
Paul Wilkins committed
81
82
  {SPLITMV,   LAST_FRAME,   ALTREF_FRAME},
  {SPLITMV,   GOLDEN_FRAME, ALTREF_FRAME},
83

Paul Wilkins's avatar
Paul Wilkins committed
84
85
86
  {ZEROMV,    LAST_FRAME,   NONE},
  {ZEROMV,    GOLDEN_FRAME, NONE},
  {ZEROMV,    ALTREF_FRAME, NONE},
Ronald S. Bultje's avatar
Ronald S. Bultje committed
87
  {ZEROMV,    LAST_FRAME,   ALTREF_FRAME},
John Koleszar's avatar
John Koleszar committed
88
  {ZEROMV,    GOLDEN_FRAME, ALTREF_FRAME},
89

Paul Wilkins's avatar
Paul Wilkins committed
90
91
92
93
94
95
96
97
98
  {I4X4_PRED, INTRA_FRAME,  NONE},
  {H_PRED,    INTRA_FRAME,  NONE},
  {V_PRED,    INTRA_FRAME,  NONE},
  {D135_PRED, INTRA_FRAME,  NONE},
  {D27_PRED,  INTRA_FRAME,  NONE},
  {D153_PRED, INTRA_FRAME,  NONE},
  {D63_PRED,  INTRA_FRAME,  NONE},
  {D117_PRED, INTRA_FRAME,  NONE},
  {D45_PRED,  INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
99
100
};

101
102
103
104
105
106
107
108
109
110
111
// The baseline rd thresholds for breaking out of the rd loop for
// certain modes are assumed to be based on 8x8 blocks.
// This table is used to correct for blocks size.
// The factors here are << 2 (2 = x0.5, 32 = x8 etc).
static int rd_thresh_block_size_factor[BLOCK_SIZE_TYPES] =
  {2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32};

#define BASE_RD_THRESH_FREQ_FACT 16
#define MAX_RD_THRESH_FREQ_FACT 32
#define MAX_RD_THRESH_FREQ_INC 1

112
static void fill_token_costs(vp9_coeff_count (*c)[BLOCK_TYPES][2],
113
                             vp9_coeff_probs_model (*p)[BLOCK_TYPES]) {
114
  int i, j, k, l;
115
116
117
118
119
120
121
122
  TX_SIZE t;
  for (t = TX_4X4; t <= TX_32X32; t++)
    for (i = 0; i < BLOCK_TYPES; i++)
      for (j = 0; j < REF_TYPES; j++)
        for (k = 0; k < COEF_BANDS; k++)
          for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
            vp9_prob probs[ENTROPY_NODES];
            vp9_model_to_full_probs(p[t][i][j][k][l], probs);
123
            vp9_cost_tokens((int *)c[t][i][j][0][k][l], probs,
124
                            vp9_coef_tree);
125
#if CONFIG_BALANCED_COEFTREE
126
127
128
            // Replace the eob node prob with a very small value so that the
            // cost approximately equals the cost without the eob node
            probs[1] = 1;
129
            vp9_cost_tokens((int *)c[t][i][j][1][k][l], probs, vp9_coef_tree);
130
#else
131
            vp9_cost_tokens_skip((int *)c[t][i][j][1][k][l], probs,
132
                                 vp9_coef_tree);
133
134
            assert(c[t][i][j][0][k][l][DCT_EOB_TOKEN] ==
                   c[t][i][j][1][k][l][DCT_EOB_TOKEN]);
135
#endif
136
          }
137
138
}

139
140
141
142
143
144
static const int rd_iifactor[32] = {
  4, 4, 3, 2, 1, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0,
};
John Koleszar's avatar
John Koleszar committed
145

146
// 3* dc_qlookup[Q]*dc_qlookup[Q];
147

148
/* values are now correlated to quantizer */
Paul Wilkins's avatar
Paul Wilkins committed
149
150
151
static int sad_per_bit16lut[QINDEX_RANGE];
static int sad_per_bit4lut[QINDEX_RANGE];

152
void vp9_init_me_luts() {
John Koleszar's avatar
John Koleszar committed
153
154
155
156
157
158
159
  int i;

  // Initialize the sad lut tables using a formulaic calculation for now
  // This is to make it easier to resolve the impact of experimental changes
  // to the quantizer tables.
  for (i = 0; i < QINDEX_RANGE; i++) {
    sad_per_bit16lut[i] =
160
      (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
161
    sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742);
John Koleszar's avatar
John Koleszar committed
162
  }
Paul Wilkins's avatar
Paul Wilkins committed
163
}
John Koleszar's avatar
John Koleszar committed
164

165
static int compute_rd_mult(int qindex) {
166
  const int q = vp9_dc_quant(qindex, 0);
167
  return (11 * q * q) >> 2;
168
169
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
170
171
172
void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
  cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
  cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
John Koleszar's avatar
John Koleszar committed
173
174
}

175

Dmitry Kovalev's avatar
Dmitry Kovalev committed
176
void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
177
  int q, i, bsize;
John Koleszar's avatar
John Koleszar committed
178

179
  vp9_clear_system_state();  // __asm emms;
John Koleszar's avatar
John Koleszar committed
180

John Koleszar's avatar
John Koleszar committed
181
182
183
184
  // Further tests required to see if optimum is different
  // for key frames, golden frames and arf frames.
  // if (cpi->common.refresh_golden_frame ||
  //     cpi->common.refresh_alt_ref_frame)
185
  qindex = clamp(qindex, 0, MAXQ);
186

Dmitry Kovalev's avatar
Dmitry Kovalev committed
187
  cpi->RDMULT = compute_rd_mult(qindex);
John Koleszar's avatar
John Koleszar committed
188
189
190
191
192
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
    if (cpi->twopass.next_iiratio > 31)
      cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
    else
      cpi->RDMULT +=
Dmitry Kovalev's avatar
Dmitry Kovalev committed
193
          (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
John Koleszar's avatar
John Koleszar committed
194
  }
195
  cpi->mb.errorperbit = cpi->RDMULT >> 6;
John Koleszar's avatar
John Koleszar committed
196
  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
197

198
  vp9_set_speed_features(cpi);
John Koleszar's avatar
John Koleszar committed
199

Dmitry Kovalev's avatar
Dmitry Kovalev committed
200
201
  q = (int)pow(vp9_dc_quant(qindex, 0) >> 2, 1.25);
  q <<= 2;
John Koleszar's avatar
John Koleszar committed
202
203
  if (q < 8)
    q = 8;
204

John Koleszar's avatar
John Koleszar committed
205
206
207
  if (cpi->RDMULT > 1000) {
    cpi->RDDIV = 1;
    cpi->RDMULT /= 100;
John Koleszar's avatar
John Koleszar committed
208

209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
    for (bsize = 0; bsize < BLOCK_SIZE_TYPES; ++bsize) {
      for (i = 0; i < MAX_MODES; ++i) {
        // Threshold here seem unecessarily harsh but fine given actual
        // range of values used for cpi->sf.thresh_mult[]
        int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);

        // *4 relates to the scaling of rd_thresh_block_size_factor[]
        if ((int64_t)cpi->sf.thresh_mult[i] < thresh_max) {
          cpi->rd_threshes[bsize][i] =
            cpi->sf.thresh_mult[i] * q *
            rd_thresh_block_size_factor[bsize] / (4 * 100);
        } else {
          cpi->rd_threshes[bsize][i] = INT_MAX;
        }
        cpi->rd_baseline_thresh[bsize][i] = cpi->rd_threshes[bsize][i];
224

225
        if (cpi->sf.adaptive_rd_thresh)
Paul Wilkins's avatar
Paul Wilkins committed
226
227
228
          cpi->rd_thresh_freq_fact[bsize][i] = MAX_RD_THRESH_FREQ_FACT;
        else
          cpi->rd_thresh_freq_fact[bsize][i] = BASE_RD_THRESH_FREQ_FACT;
John Koleszar's avatar
John Koleszar committed
229
      }
John Koleszar's avatar
John Koleszar committed
230
    }
John Koleszar's avatar
John Koleszar committed
231
232
  } else {
    cpi->RDDIV = 100;
John Koleszar's avatar
John Koleszar committed
233

234
235
236
237
238
239
240
241
242
243
244
245
246
247
    for (bsize = 0; bsize < BLOCK_SIZE_TYPES; ++bsize) {
      for (i = 0; i < MAX_MODES; i++) {
        // Threshold here seem unecessarily harsh but fine given actual
        // range of values used for cpi->sf.thresh_mult[]
        int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);

        if (cpi->sf.thresh_mult[i] < thresh_max) {
          cpi->rd_threshes[bsize][i] =
            cpi->sf.thresh_mult[i] * q *
            rd_thresh_block_size_factor[bsize] / 4;
        } else {
          cpi->rd_threshes[bsize][i] = INT_MAX;
        }
        cpi->rd_baseline_thresh[bsize][i] = cpi->rd_threshes[bsize][i];
Paul Wilkins's avatar
Paul Wilkins committed
248

249
        if (cpi->sf.adaptive_rd_thresh)
Paul Wilkins's avatar
Paul Wilkins committed
250
251
252
          cpi->rd_thresh_freq_fact[bsize][i] = MAX_RD_THRESH_FREQ_FACT;
        else
          cpi->rd_thresh_freq_fact[bsize][i] = BASE_RD_THRESH_FREQ_FACT;
John Koleszar's avatar
John Koleszar committed
253
      }
John Koleszar's avatar
John Koleszar committed
254
    }
John Koleszar's avatar
John Koleszar committed
255
  }
John Koleszar's avatar
John Koleszar committed
256

257
  fill_token_costs(cpi->mb.token_costs, cpi->common.fc.coef_probs);
258

259
  for (i = 0; i < NUM_PARTITION_CONTEXTS; i++)
260
    vp9_cost_tokens(cpi->mb.partition_cost[i],
261
                    cpi->common.fc.partition_prob[cpi->common.frame_type][i],
262
263
                    vp9_partition_tree);

John Koleszar's avatar
John Koleszar committed
264
  /*rough estimate for costing*/
265
  vp9_init_mode_costs(cpi);
John Koleszar's avatar
John Koleszar committed
266

267
  if (cpi->common.frame_type != KEY_FRAME) {
268
    vp9_build_nmv_cost_table(
269
270
271
272
273
        cpi->mb.nmvjointcost,
        cpi->mb.e_mbd.allow_high_precision_mv ?
        cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
        &cpi->common.fc.nmvc,
        cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
274
275
276
277
278
279

    for (i = 0; i < INTER_MODE_CONTEXTS; i++) {
      MB_PREDICTION_MODE m;

      for (m = NEARESTMV; m < MB_MODE_COUNT; m++)
        cpi->mb.inter_mode_cost[i][m - NEARESTMV] =
280
            cost_token(vp9_inter_mode_tree,
281
                       cpi->common.fc.inter_mode_probs[i],
282
                       vp9_inter_mode_encodings - NEARESTMV + m);
283
    }
284
  }
John Koleszar's avatar
John Koleszar committed
285
286
}

287
288
static INLINE BLOCK_SIZE_TYPE get_block_size(int bwl, int bhl) {
  return bsize_from_dim_lookup[bwl][bhl];
Deb Mukherjee's avatar
Deb Mukherjee committed
289
290
}

291
292
293
294
static BLOCK_SIZE_TYPE get_plane_block_size(BLOCK_SIZE_TYPE bsize,
                                            struct macroblockd_plane *pd) {
  return get_block_size(plane_block_width_log2by4(bsize, pd),
                        plane_block_height_log2by4(bsize, pd));
Deb Mukherjee's avatar
Deb Mukherjee committed
295
296
}

Yaowu Xu's avatar
Yaowu Xu committed
297
static INLINE void linear_interpolate2(double x, int ntab, int inv_step,
298
299
                                       const double *tab1, const double *tab2,
                                       double *v1, double *v2) {
Deb Mukherjee's avatar
Deb Mukherjee committed
300
301
302
  double y = x * inv_step;
  int d = (int) y;
  if (d >= ntab - 1) {
303
304
    *v1 = tab1[ntab - 1];
    *v2 = tab2[ntab - 1];
Deb Mukherjee's avatar
Deb Mukherjee committed
305
306
  } else {
    double a = y - d;
307
308
    *v1 = tab1[d] * (1 - a) + tab1[d + 1] * a;
    *v2 = tab2[d] * (1 - a) + tab2[d + 1] * a;
Deb Mukherjee's avatar
Deb Mukherjee committed
309
310
311
  }
}

312
313
314
315
316
static void model_rd_norm(double x, double *R, double *D) {
  static const int inv_tab_step = 8;
  static const int tab_size = 120;
  // NOTE: The tables below must be of the same size
  //
Deb Mukherjee's avatar
Deb Mukherjee committed
317
  // Normalized rate
318
  // This table models the rate for a Laplacian source
Deb Mukherjee's avatar
Deb Mukherjee committed
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
  // source with given variance when quantized with a uniform quantizer
  // with given stepsize. The closed form expression is:
  // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
  // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
  // and H(x) is the binary entropy function.
  static const double rate_tab[] = {
    64.00, 4.944, 3.949, 3.372, 2.966, 2.655, 2.403, 2.194,
    2.014, 1.858, 1.720, 1.596, 1.485, 1.384, 1.291, 1.206,
    1.127, 1.054, 0.986, 0.923, 0.863, 0.808, 0.756, 0.708,
    0.662, 0.619, 0.579, 0.541, 0.506, 0.473, 0.442, 0.412,
    0.385, 0.359, 0.335, 0.313, 0.291, 0.272, 0.253, 0.236,
    0.220, 0.204, 0.190, 0.177, 0.165, 0.153, 0.142, 0.132,
    0.123, 0.114, 0.106, 0.099, 0.091, 0.085, 0.079, 0.073,
    0.068, 0.063, 0.058, 0.054, 0.050, 0.047, 0.043, 0.040,
    0.037, 0.034, 0.032, 0.029, 0.027, 0.025, 0.023, 0.022,
    0.020, 0.019, 0.017, 0.016, 0.015, 0.014, 0.013, 0.012,
    0.011, 0.010, 0.009, 0.008, 0.008, 0.007, 0.007, 0.006,
    0.006, 0.005, 0.005, 0.005, 0.004, 0.004, 0.004, 0.003,
    0.003, 0.003, 0.003, 0.002, 0.002, 0.002, 0.002, 0.002,
    0.002, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001,
    0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.000,
  };
  // Normalized distortion
342
  // This table models the normalized distortion for a Laplacian source
Deb Mukherjee's avatar
Deb Mukherjee committed
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
  // source with given variance when quantized with a uniform quantizer
  // with given stepsize. The closed form expression is:
  // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
  // where x = qpstep / sqrt(variance)
  // Note the actual distortion is Dn * variance.
  static const double dist_tab[] = {
    0.000, 0.001, 0.005, 0.012, 0.021, 0.032, 0.045, 0.061,
    0.079, 0.098, 0.119, 0.142, 0.166, 0.190, 0.216, 0.242,
    0.269, 0.296, 0.324, 0.351, 0.378, 0.405, 0.432, 0.458,
    0.484, 0.509, 0.534, 0.557, 0.580, 0.603, 0.624, 0.645,
    0.664, 0.683, 0.702, 0.719, 0.735, 0.751, 0.766, 0.780,
    0.794, 0.807, 0.819, 0.830, 0.841, 0.851, 0.861, 0.870,
    0.878, 0.886, 0.894, 0.901, 0.907, 0.913, 0.919, 0.925,
    0.930, 0.935, 0.939, 0.943, 0.947, 0.951, 0.954, 0.957,
    0.960, 0.963, 0.966, 0.968, 0.971, 0.973, 0.975, 0.976,
    0.978, 0.980, 0.981, 0.982, 0.984, 0.985, 0.986, 0.987,
    0.988, 0.989, 0.990, 0.990, 0.991, 0.992, 0.992, 0.993,
    0.993, 0.994, 0.994, 0.995, 0.995, 0.996, 0.996, 0.996,
    0.996, 0.997, 0.997, 0.997, 0.997, 0.998, 0.998, 0.998,
    0.998, 0.998, 0.998, 0.999, 0.999, 0.999, 0.999, 0.999,
    0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 1.000,
  };
365
366
367
368
369
  /*
  assert(sizeof(rate_tab) == tab_size * sizeof(rate_tab[0]);
  assert(sizeof(dist_tab) == tab_size * sizeof(dist_tab[0]);
  assert(sizeof(rate_tab) == sizeof(dist_tab));
  */
Deb Mukherjee's avatar
Deb Mukherjee committed
370
  assert(x >= 0.0);
371
372
  linear_interpolate2(x, tab_size, inv_tab_step,
                      rate_tab, dist_tab, R, D);
Deb Mukherjee's avatar
Deb Mukherjee committed
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
}

static void model_rd_from_var_lapndz(int var, int n, int qstep,
                                     int *rate, int64_t *dist) {
  // This function models the rate and distortion for a Laplacian
  // source with given variance when quantized with a uniform quantizer
  // with given stepsize. The closed form expressions are in:
  // Hang and Chen, "Source Model for transform video coder and its
  // application - Part I: Fundamental Theory", IEEE Trans. Circ.
  // Sys. for Video Tech., April 1997.
  vp9_clear_system_state();
  if (var == 0 || n == 0) {
    *rate = 0;
    *dist = 0;
  } else {
    double D, R;
    double s2 = (double) var / n;
    double x = qstep / sqrt(s2);
391
392
393
    model_rd_norm(x, &R, &D);
    *rate = ((n << 8) * R + 0.5);
    *dist = (var * D + 0.5);
Deb Mukherjee's avatar
Deb Mukherjee committed
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
  }
  vp9_clear_system_state();
}

static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
                            MACROBLOCK *x, MACROBLOCKD *xd,
                            int *out_rate_sum, int64_t *out_dist_sum) {
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
  int i, rate_sum = 0, dist_sum = 0;

  for (i = 0; i < MAX_MB_PLANE; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &xd->plane[i];

    // TODO(dkovalev) the same code in get_plane_block_size
411
412
413
    const int bwl = plane_block_width_log2by4(bsize, pd);
    const int bhl = plane_block_height_log2by4(bsize, pd);
    const BLOCK_SIZE_TYPE bs = get_block_size(bwl, bhl);
Deb Mukherjee's avatar
Deb Mukherjee committed
414
415
416
417
418
419
    unsigned int sse;
    int rate;
    int64_t dist;
    (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
                              pd->dst.buf, pd->dst.stride, &sse);
    // sse works better than var, since there is no dc prediction used
420
421
    model_rd_from_var_lapndz(sse, 16 << (bwl + bhl),
                             pd->dequant[1] >> 3, &rate, &dist);
Deb Mukherjee's avatar
Deb Mukherjee committed
422
423
424
425
426
427
428
429
430

    rate_sum += rate;
    dist_sum += dist;
  }

  *out_rate_sum = rate_sum;
  *out_dist_sum = dist_sum << 4;
}

431
432
433
434
435
436
437
438
439
440
static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
                              MACROBLOCK *x, MACROBLOCKD *xd,
                              int *out_rate_sum, int64_t *out_dist_sum) {
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];

  // TODO(dkovalev) the same code in get_plane_block_size
441
442
443
  const int bwl = plane_block_width_log2by4(bsize, pd);
  const int bhl = plane_block_height_log2by4(bsize, pd);
  const BLOCK_SIZE_TYPE bs = get_block_size(bwl, bhl);
444
445
446
447
448
449
  unsigned int sse;
  int rate;
  int64_t dist;
  (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
                            pd->dst.buf, pd->dst.stride, &sse);
  // sse works better than var, since there is no dc prediction used
450
451
  model_rd_from_var_lapndz(sse, 16 << (bwl + bhl),
                           pd->dequant[1] >> 3, &rate, &dist);
452
453
454
455
456

  *out_rate_sum = rate;
  *out_dist_sum = dist << 4;
}

Deb Mukherjee's avatar
Deb Mukherjee committed
457
458
459
460
461
static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
                                 TX_SIZE tx_size,
                                 MACROBLOCK *x, MACROBLOCKD *xd,
                                 int *out_rate_sum, int64_t *out_dist_sum,
                                 int *out_skip) {
462
  int t = 4, j, k;
463
  BLOCK_SIZE_TYPE bs = BLOCK_SIZE_AB4X4;
Deb Mukherjee's avatar
Deb Mukherjee committed
464
465
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];
466
467
468
469
  const int bwl = plane_block_width_log2by4(bsize, pd);
  const int bhl = plane_block_height_log2by4(bsize, pd);
  const int bw = 4 << bwl;
  const int bh = 4 << bhl;
Deb Mukherjee's avatar
Deb Mukherjee committed
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
  int rate_sum = 0;
  int64_t dist_sum = 0;

  if (tx_size == TX_4X4) {
    bs = BLOCK_4X4;
    t = 4;
  } else if (tx_size == TX_8X8) {
    bs = BLOCK_8X8;
    t = 8;
  } else if (tx_size == TX_16X16) {
    bs = BLOCK_16X16;
    t = 16;
  } else if (tx_size == TX_32X32) {
    bs = BLOCK_32X32;
    t = 32;
  } else {
    assert(0);
  }
488
  assert(bs <= get_block_size(bwl, bhl));
Deb Mukherjee's avatar
Deb Mukherjee committed
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
  *out_skip = 1;
  for (j = 0; j < bh; j+=t) {
    for (k = 0; k < bw; k+=t) {
      int rate;
      int64_t dist;
      unsigned int sse;
      (void) cpi->fn_ptr[bs].vf(p->src.buf + j * p->src.stride + k,
                                p->src.stride,
                                pd->dst.buf + j * pd->dst.stride + k,
                                pd->dst.stride, &sse);
      // sse works better than var, since there is no dc prediction used
      model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3,
                               &rate, &dist);
      rate_sum += rate;
      dist_sum += dist;
      *out_skip &= (rate < 1024);
    }
  }
  *out_rate_sum = rate_sum;
  *out_dist_sum = (dist_sum << 4);
}

Ronald S. Bultje's avatar
Ronald S. Bultje committed
511
int64_t vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff,
512
                          intptr_t block_size, int64_t *ssz) {
Ronald S. Bultje's avatar
Ronald S. Bultje committed
513
  int i;
514
  int64_t error = 0, sqcoeff = 0;
John Koleszar's avatar
John Koleszar committed
515

516
  for (i = 0; i < block_size; i++) {
John Koleszar's avatar
John Koleszar committed
517
    int this_diff = coeff[i] - dqcoeff[i];
Ronald S. Bultje's avatar
Ronald S. Bultje committed
518
    error += (unsigned)this_diff * this_diff;
519
    sqcoeff += (unsigned) coeff[i] * coeff[i];
John Koleszar's avatar
John Koleszar committed
520
  }
John Koleszar's avatar
John Koleszar committed
521

522
  *ssz = sqcoeff;
John Koleszar's avatar
John Koleszar committed
523
  return error;
John Koleszar's avatar
John Koleszar committed
524
525
}

526
static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
527
                              int plane, int block, PLANE_TYPE type,
528
529
                              ENTROPY_CONTEXT *A,
                              ENTROPY_CONTEXT *L,
John Koleszar's avatar
John Koleszar committed
530
531
                              TX_SIZE tx_size,
                              int y_blocks) {
532
  MACROBLOCKD *const xd = &mb->e_mbd;
533
534
  MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
  int pt;
535
  int c = 0;
536
  int cost = 0;
537
  const int16_t *scan = NULL, *nb;
538
  const int eob = xd->plane[plane].eobs[block];
539
  const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff, block, 16);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
540
  const int ref = mbmi->ref_frame[0] != INTRA_FRAME;
541
542
  unsigned int (*token_costs)[COEF_BANDS][PREV_COEF_CONTEXTS]
                    [MAX_ENTROPY_TOKENS] = mb->token_costs[tx_size][type][ref];
543
  ENTROPY_CONTEXT above_ec = 0, left_ec = 0;
544
  TX_TYPE tx_type = DCT_DCT;
545
  const int segment_id = xd->mode_info_context->mbmi.segment_id;
546
  int seg_eob = 0;
547
  uint8_t token_cache[1024];
548
  const uint8_t *band_translate = NULL;
549
550

  // Check for consistency of tx_size with mode info
551
  assert((!type && !plane) || (type && plane));
552
553
554
  if (type == PLANE_TYPE_Y_WITH_DC) {
    assert(xd->mode_info_context->mbmi.txfm_size == tx_size);
  } else {
555
    assert(tx_size == get_uv_tx_size(mbmi));
556
557
  }

558
  switch (tx_size) {
559
    case TX_4X4: {
560
      tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
561
          get_tx_type_4x4(xd, block) : DCT_DCT;
562
563
      above_ec = A[0] != 0;
      left_ec = L[0] != 0;
564
      seg_eob = 16;
565
      scan = get_scan_4x4(tx_type);
Paul Wilkins's avatar
Paul Wilkins committed
566
      band_translate = vp9_coefband_trans_4x4;
Daniel Kang's avatar
Daniel Kang committed
567
      break;
568
    }
569
    case TX_8X8: {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
570
571
      const TX_TYPE tx_type = type == PLANE_TYPE_Y_WITH_DC ?
                                  get_tx_type_8x8(xd) : DCT_DCT;
572
573
      above_ec = (A[0] + A[1]) != 0;
      left_ec = (L[0] + L[1]) != 0;
574
      scan = get_scan_8x8(tx_type);
575
      seg_eob = 64;
Paul Wilkins's avatar
Paul Wilkins committed
576
      band_translate = vp9_coefband_trans_8x8plus;
Daniel Kang's avatar
Daniel Kang committed
577
      break;
578
579
    }
    case TX_16X16: {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
580
581
      const TX_TYPE tx_type = type == PLANE_TYPE_Y_WITH_DC ?
                                  get_tx_type_16x16(xd) : DCT_DCT;
582
      scan = get_scan_16x16(tx_type);
583
      seg_eob = 256;
584
585
      above_ec = (A[0] + A[1] + A[2] + A[3]) != 0;
      left_ec = (L[0] + L[1] + L[2] + L[3]) != 0;
Paul Wilkins's avatar
Paul Wilkins committed
586
      band_translate = vp9_coefband_trans_8x8plus;
Daniel Kang's avatar
Daniel Kang committed
587
      break;
588
    }
589
    case TX_32X32:
Paul Wilkins's avatar
Paul Wilkins committed
590
      scan = vp9_default_scan_32x32;
591
      seg_eob = 1024;
592
593
      above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0;
      left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0;
Paul Wilkins's avatar
Paul Wilkins committed
594
      band_translate = vp9_coefband_trans_8x8plus;
595
      break;
Daniel Kang's avatar
Daniel Kang committed
596
    default:
Dmitry Kovalev's avatar
Dmitry Kovalev committed
597
      assert(0);
Daniel Kang's avatar
Daniel Kang committed
598
599
      break;
  }
John Koleszar's avatar
John Koleszar committed
600
  assert(eob <= seg_eob);
601

602
  pt = combine_entropy_contexts(above_ec, left_ec);
603
  nb = vp9_get_coef_neighbors_handle(scan);
604

605
  if (vp9_segfeature_active(&xd->seg, segment_id, SEG_LVL_SKIP))
606
    seg_eob = 0;
607

608
609
610
611
  /* sanity check to ensure that we do not have spurious non-zero q values */
  if (eob < seg_eob)
    assert(qcoeff_ptr[scan[eob]] == 0);

612
613
614
615
  if (eob == 0) {
    // single eob token
    cost += token_costs[0][0][pt][DCT_EOB_TOKEN];
  } else {
616
    int v, prev_t;
617
618
619

    // dc token
    v = qcoeff_ptr[0];
620
621
622
    prev_t = vp9_dct_value_tokens_ptr[v].token;
    cost += token_costs[0][0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
    token_cache[0] = vp9_pt_energy_class[prev_t];
623
624
625
626

    // ac tokens
    for (c = 1; c < eob; c++) {
      const int rc = scan[c];
627
628
      const int band = get_coef_band(band_translate, c);
      int t;
629
630
631

      v = qcoeff_ptr[rc];
      t = vp9_dct_value_tokens_ptr[v].token;
632
      pt = get_coef_context(nb, token_cache, c);
633
      cost += token_costs[!prev_t][band][pt][t] + vp9_dct_value_cost_ptr[v];
634
      token_cache[rc] = vp9_pt_energy_class[t];
635
      prev_t = t;
636
    }
637
638

    // eob token
639
    if (c < seg_eob) {
640
      pt = get_coef_context(nb, token_cache, c);
641
642
      cost += token_costs[0][get_coef_band(band_translate, c)][pt]
                         [DCT_EOB_TOKEN];
643
    }
644
645
  }

646
647
648
  // is eob first coefficient;
  for (pt = 0; pt < (1 << tx_size); pt++) {
    A[pt] = L[pt] = c > 0;
649
  }
650

651
652
653
  return cost;
}

Deb Mukherjee's avatar
Deb Mukherjee committed
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
struct rdcost_block_args {
  VP9_COMMON *cm;
  MACROBLOCK *x;
  ENTROPY_CONTEXT t_above[16];
  ENTROPY_CONTEXT t_left[16];
  TX_SIZE tx_size;
  int bw;
  int bh;
  int rate;
  int64_t dist;
  int64_t sse;
  int64_t best_rd;
  int skip;
};

static void dist_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
                       int ss_txfrm_size, void *arg) {
  struct rdcost_block_args* args = arg;
  MACROBLOCK* const x = args->x;
  MACROBLOCKD* const xd = &x->e_mbd;
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];
  int64_t this_sse;
  int shift = args->tx_size == TX_32X32 ? 0 : 2;
  int16_t *const coeff = BLOCK_OFFSET(p->coeff, block, 16);
  int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block, 16);
  args->dist += vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
                                &this_sse) >> shift;
  args->sse += this_sse >> shift;
683
684
685
686
687
688
689
690
691

  if (x->skip_encode &&
      xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME) {
    // TODO(jingning): tune the model to better capture the distortion.
    int64_t p = (pd->dequant[1] * pd->dequant[1] *
                    (1 << ss_txfrm_size)) >> shift;
    args->dist += p;
    args->sse  += p;
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
}

static void rate_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
                       int ss_txfrm_size, void *arg) {
  struct rdcost_block_args* args = arg;
  int x_idx, y_idx;
  MACROBLOCKD * const xd = &args->x->e_mbd;

  txfrm_block_to_raster_xy(xd, bsize, plane, block, args->tx_size * 2, &x_idx,
                           &y_idx);

  args->rate += cost_coeffs(args->cm, args->x, plane, block,
                            xd->plane[plane].plane_type, args->t_above + x_idx,
                            args->t_left + y_idx, args->tx_size,
                            args->bw * args->bh);
}

709
710
// FIXME(jingning): need to make the rd test of chroma components consistent
// with that of luma component. this function should be deprecated afterwards.
Deb Mukherjee's avatar
Deb Mukherjee committed
711
712
713
714
715
716
717
static int rdcost_plane(VP9_COMMON * const cm, MACROBLOCK *x, int plane,
                        BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
  MACROBLOCKD * const xd = &x->e_mbd;
  const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
  const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y;
  const int bw = 1 << bwl, bh = 1 << bhl;
  struct rdcost_block_args args = { cm, x, { 0 }, { 0 }, tx_size, bw, bh,
718
    0, 0, 0, INT64_MAX, 0 };
Deb Mukherjee's avatar
Deb Mukherjee committed
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741

  vpx_memcpy(&args.t_above, xd->plane[plane].above_context,
             sizeof(ENTROPY_CONTEXT) * bw);
  vpx_memcpy(&args.t_left, xd->plane[plane].left_context,
             sizeof(ENTROPY_CONTEXT) * bh);

  foreach_transformed_block_in_plane(xd, bsize, plane, rate_block, &args);
  return args.rate;
}

static int rdcost_uv(VP9_COMMON *const cm, MACROBLOCK *x,
                     BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
  int cost = 0, plane;

  for (plane = 1; plane < MAX_MB_PLANE; plane++) {
    cost += rdcost_plane(cm, x, plane, bsize, tx_size);
  }
  return cost;
}

static int block_error_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize,
                           int shift, int64_t *sse) {
  struct macroblockd_plane *p = &x->e_mbd.plane[0];
742
743
  const int bwl = plane_block_width_log2by4(bsize, p);
  const int bhl = plane_block_height_log2by4(bsize, p);
Deb Mukherjee's avatar
Deb Mukherjee committed
744
  int64_t e = vp9_block_error(x->plane[0].coeff, x->e_mbd.plane[0].dqcoeff,
745
                              16 << (bwl + bhl), sse) >> shift;
Deb Mukherjee's avatar
Deb Mukherjee committed
746
747
748
749
750
751
752
753
754
755
756
757
  *sse >>= shift;
  return e;
}

static int64_t block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize,
                                int shift, int64_t *sse) {
  int64_t sum = 0, this_sse;
  int plane;

  *sse = 0;
  for (plane = 1; plane < MAX_MB_PLANE; plane++) {
    struct macroblockd_plane *p = &x->e_mbd.plane[plane];
758
759
    const int bwl = plane_block_width_log2by4(bsize, p);
    const int bhl = plane_block_height_log2by4(bsize, p);
Deb Mukherjee's avatar
Deb Mukherjee committed
760
    sum += vp9_block_error(x->plane[plane].coeff, x->e_mbd.plane[plane].dqcoeff,
761
                           16 << (bwl + bhl), &this_sse);
Deb Mukherjee's avatar
Deb Mukherjee committed
762
763
764
765
766
767
768
769
770
771
772
773
    *sse += this_sse;
  }
  *sse >>= shift;
  return sum >> shift;
}

static void block_yrd_txfm(int plane, int block, BLOCK_SIZE_TYPE bsize,
                           int ss_txfrm_size, void *arg) {
  struct rdcost_block_args *args = arg;
  MACROBLOCK *const x = args->x;
  MACROBLOCKD *const xd = &x->e_mbd;
  struct encode_b_args encode_args = {args->cm, x, NULL};
774
  int64_t rd1, rd2, rd;
Deb Mukherjee's avatar
Deb Mukherjee committed
775

776
777
  if (args->skip)
    return;
778
779
780
781
  rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
  rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
  rd = MIN(rd1, rd2);
  if (rd > args->best_rd) {
782
783
784
785
786
787
788
    args->skip = 1;
    args->rate = INT_MAX;
    args->dist = INT64_MAX;
    args->sse  = INT64_MAX;
    return;
  }

Deb Mukherjee's avatar
Deb Mukherjee committed
789
790
791
792
793
794
795
796
797
798
799
800
  if (xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME)
    encode_block_intra(plane, block, bsize, ss_txfrm_size, &encode_args);
  else
    xform_quant(plane, block, bsize, ss_txfrm_size, &encode_args);

  dist_block(plane, block, bsize, ss_txfrm_size, args);
  rate_block(plane, block, bsize, ss_txfrm_size, args);
}

static void super_block_yrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
                                     int *rate, int64_t *distortion,
                                     int *skippable, int64_t *sse,
801
                                     int64_t ref_best_rd,
Deb Mukherjee's avatar
Deb Mukherjee committed
802
803
804
805
806
807
808
                                     BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
  MACROBLOCKD *const xd = &x->e_mbd;
  struct macroblockd_plane *const pd = &xd->plane[0];
  const int bwl = b_width_log2(bsize) - xd->plane[0].subsampling_x;
  const int bhl = b_height_log2(bsize) - xd->plane[0].subsampling_y;
  const int bw = 1 << bwl, bh = 1 << bhl;
  struct rdcost_block_args args = { cm, x, { 0 }, { 0 }, tx_size, bw, bh,
809
                                    0, 0, 0, ref_best_rd, 0 };
Deb Mukherjee's avatar
Deb Mukherjee committed
810
811
812
813
814
815
816
817
  xd->mode_info_context->mbmi.txfm_size = tx_size;
  vpx_memcpy(&args.t_above, pd->above_context, sizeof(ENTROPY_CONTEXT) * bw);
  vpx_memcpy(&args.t_left, pd->left_context, sizeof(ENTROPY_CONTEXT) * bh);

  foreach_transformed_block_in_plane(xd, bsize, 0, block_yrd_txfm, &args);
  *distortion = args.dist;
  *rate       = args.rate;
  *sse        = args.sse;
818
  *skippable  = vp9_sby_is_skippable(xd, bsize) && (!args.skip);
Deb Mukherjee's avatar
Deb Mukherjee committed
819
820
821
822
823
}

static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x,
                                     int *rate, int64_t *distortion,
                                     int *skip, int64_t *sse,
824
                                     int64_t ref_best_rd,
Deb Mukherjee's avatar
Deb Mukherjee committed
825
826
827
828
829
830
831
                                     BLOCK_SIZE_TYPE bs) {
  const TX_SIZE max_txfm_size = TX_32X32
      - (bs < BLOCK_SIZE_SB32X32) - (bs < BLOCK_SIZE_MB16X16);
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
  if (max_txfm_size == TX_32X32 &&
832
833
      (cm->tx_mode == ALLOW_32X32 ||
       cm->tx_mode == TX_MODE_SELECT)) {
Deb Mukherjee's avatar
Deb Mukherjee committed
834
835
    mbmi->txfm_size = TX_32X32;
  } else if (max_txfm_size >= TX_16X16 &&
836
837
838
             (cm->tx_mode == ALLOW_16X16 ||
              cm->tx_mode == ALLOW_32X32 ||
              cm->tx_mode == TX_MODE_SELECT)) {
Deb Mukherjee's avatar
Deb Mukherjee committed
839
    mbmi->txfm_size = TX_16X16;
840
  } else if (cm->tx_mode != ONLY_4X4) {
Deb Mukherjee's avatar
Deb Mukherjee committed
841
842
843
844
845
    mbmi->txfm_size = TX_8X8;
  } else {
    mbmi->txfm_size = TX_4X4;
  }
  super_block_yrd_for_txfm(cm, x, rate, distortion, skip,
846
                           &sse[mbmi->txfm_size], ref_best_rd, bs,
Deb Mukherjee's avatar
Deb Mukherjee committed
847
848
849
850
                           mbmi->txfm_size);
  cpi->txfm_stepdown_count[0]++;
}

851
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
852
                                     int (*r)[2], int *rate,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
853
                                     int64_t *d, int64_t *distortion,
854
855
                                     int *s, int *skip,
                                     int64_t txfm_cache[NB_TXFM_MODES],
Deb Mukherjee's avatar
Deb Mukherjee committed
856
857
858
                                     BLOCK_SIZE_TYPE bs) {
  const TX_SIZE max_txfm_size = TX_32X32
      - (bs < BLOCK_SIZE_SB32X32) - (bs < BLOCK_SIZE_MB16X16);
859
860
861
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
862
  vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd);
863
864
  int64_t rd[TX_SIZE_MAX_SB][2];
  int n, m;
865
  int s0, s1;
866

867
  const vp9_prob *tx_probs = vp9_get_pred_probs_tx_size(xd, &cm->fc.tx_probs);
868

869
870
  for (n = TX_4X4; n <= max_txfm_size; n++) {
    r[n][1] = r[n][0];
871
872
    if (r[n][0] == INT_MAX)
      continue;
873
874
    for (m = 0; m <= n - (n == max_txfm_size); m++) {
      if (m == n)
875
        r[n][1] += vp9_cost_zero(tx_probs[m]);
876
      else
877
        r[n][1] += vp9_cost_one(tx_probs[m]);
878
879
    }
  }
880

881
882
883
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
884

885
  for (n = TX_4X4; n <= max_txfm_size; n++) {
886
887
888
889
    if (d[n] == INT64_MAX) {
      rd[n][0] = rd[n][1] = INT64_MAX;
      continue;
    }
890
891
892
893
894
    if (s[n]) {
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
895
896
897
    }
  }

898
  if (max_txfm_size == TX_32X32 &&
899
900
      (cm->tx_mode == ALLOW_32X32 ||
       (cm->tx_mode == TX_MODE_SELECT &&
901
902
903
        rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
        rd[TX_32X32][1] < rd[TX_4X4][1]))) {
    mbmi->txfm_size = TX_32X32;
904
  } else if (max_txfm_size >= TX_16X16 &&
905
906
907
             (cm->tx_mode == ALLOW_16X16 ||
              cm->tx_mode == ALLOW_32X32 ||
              (cm->tx_mode == TX_MODE_SELECT &&
908
909
               rd[TX_16X16][1] < rd[TX_8X8][1] &&
               rd[TX_16X16][1] < rd[TX_4X4][1]))) {
910
    mbmi->txfm_size = TX_16X16;
911
912
913
914
  } else if (cm->tx_mode == ALLOW_8X8 ||
             cm->tx_mode == ALLOW_16X16 ||
             cm->tx_mode == ALLOW_32X32 ||
           (cm->tx_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
915
916
917
918
919
    mbmi->txfm_size = TX_8X8;
  } else {
    mbmi->txfm_size = TX_4X4;
  }

920
  *distortion = d[mbmi->txfm_size];
921
  *rate       = r[mbmi->txfm_size][cm->tx_mode == TX_MODE_SELECT];
922
923
  *skip       = s[mbmi->txfm_size];

924
925
  txfm_cache[ONLY_4X4] = rd[TX_4X4][0];
  txfm_cache[ALLOW_8X8] = rd[TX_8X8][0];
926
927
  txfm_cache[ALLOW_16X16] = rd[MIN(max_txfm_size, TX_16X16)][0];
  txfm_cache[ALLOW_32X32] = rd[MIN(max_txfm_size, TX_32X32)][0];
928
929
930
931
  if (max_txfm_size == TX_32X32 &&
      rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
      rd[TX_32X32][1] < rd[TX_4X4][1])
    txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
932
933
  else if (max_txfm_size >= TX_16X16 &&
           rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
934
    txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
935
  else
936
937
    txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
                                 rd[TX_4X4][1] : rd[TX_8X8][1];
938

Deb Mukherjee's avatar
Deb Mukherjee committed
939
940
941
942
943
944
945
946
947
948
949
950
951
  if (max_txfm_size == TX_32X32 &&
      rd[TX_32X32][1] < rd[TX_16X16][1] &&
      rd[TX_32X32][1] < rd[TX_8X8][1] &&
      rd[TX_32X32][1] < rd[TX_4X4][1]) {
    cpi->txfm_stepdown_count[0]++;
  } else if (max_txfm_size >= TX_16X16 &&
             rd[TX_16X16][1] < rd[TX_8X8][1] &&
             rd[TX_16X16][1] < rd[TX_4X4][1]) {
    cpi->txfm_stepdown_count[max_txfm_size - TX_16X16]++;
  } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
    cpi->txfm_stepdown_count[max_txfm_size - TX_8X8]++;
  } else {
    cpi->txfm_stepdown_count[max_txfm_size - TX_4X4]++;
952
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
953
}
954

Deb Mukherjee's avatar
Deb Mukherjee committed
955
956
957
958
static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
                                          int (*r)[2], int *rate,
                                          int64_t *d, int64_t *distortion,
                                          int *s, int *skip, int64_t *sse,
959
                                          int64_t ref_best_rd,
Deb Mukherjee's avatar
Deb Mukherjee committed
960
961
962
963
964
965
966
                                          BLOCK_SIZE_TYPE bs,
                                          int *model_used) {
  const TX_SIZE max_txfm_size = TX_32X32
      - (bs < BLOCK_SIZE_SB32X32) - (bs < BLOCK_SIZE_MB16X16);
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
967
  vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd);
Deb Mukherjee's avatar
Deb Mukherjee committed
968
969
970
971
972
  int64_t rd[TX_SIZE_MAX_SB][2];
  int n, m;
  int s0, s1;
  double scale_rd[TX_SIZE_MAX_SB] = {1.73, 1.44, 1.20, 1.00};
  // double scale_r[TX_SIZE_MAX_SB] = {2.82, 2.00, 1.41, 1.00};
973

974
  const vp9_prob *tx_probs = vp9_get_pred_probs_tx_size(xd, &cm->fc.tx_probs);
975

Deb Mukherjee's avatar
Deb Mukherjee committed
976
977
  // for (n = TX_4X4; n <= max_txfm_size; n++)
  //   r[n][0] = (r[n][0] * scale_r[n]);
978

Deb Mukherjee's avatar
Deb Mukherjee committed
979
980
981
982
983
984
985
986
  for (n = TX_4X4; n <= max_txfm_size; n++) {
    r[n][1] = r[n][0];
    for (m = 0; m <= n - (n == max_txfm_size); m++) {
      if (m == n)
        r[n][1] += vp9_cost_zero(tx_probs[m]);
      else
        r[n][1] += vp9_cost_one(tx_probs[m]);
    }
987
  }
988

Deb Mukherjee's avatar
Deb Mukherjee committed
989
990
991
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
992

Deb Mukherjee's avatar
Deb Mukherjee committed
993
994
995
996
997
998
999
1000
1001
1002