vp9_rdopt.c 153 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
11
12
13
14
15
 */


#include <stdio.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
16

17
#include "vp9/common/vp9_pragmas.h"
18
19
20
21
22
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_modecosts.h"
#include "vp9/encoder/vp9_encodeintra.h"
23
24
25
26
27
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_quant_common.h"
28
29
30
31
32
33
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_variance.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_ratectrl.h"
John Koleszar's avatar
John Koleszar committed
34
#include "vpx_mem/vpx_mem.h"
35
36
37
38
39
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_entropy.h"
40
#include "vp9_rtcd.h"
41
#include "vp9/common/vp9_mvref_common.h"
Ronald S. Bultje's avatar
Ronald S. Bultje committed
42
#include "vp9/common/vp9_common.h"
Paul Wilkins's avatar
Paul Wilkins committed
43

44
45
#define INVALID_MV 0x80008000

46
47
48
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1

49
50
51
DECLARE_ALIGNED(16, extern const uint8_t,
                vp9_pt_energy_class[MAX_ENTROPY_TOKENS]);

Ronald S. Bultje's avatar
Ronald S. Bultje committed
52
#define I4X4_PRED 0x8000
Ronald S. Bultje's avatar
Ronald S. Bultje committed
53
#define SPLITMV 0x10000
Ronald S. Bultje's avatar
Ronald S. Bultje committed
54

55
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
56
  {NEARESTMV, LAST_FRAME,   NONE},
Paul Wilkins's avatar
Paul Wilkins committed
57
58
59
60
  {NEARESTMV, ALTREF_FRAME, NONE},
  {NEARESTMV, GOLDEN_FRAME, NONE},
  {NEWMV,     LAST_FRAME,   NONE},
  {NEARESTMV, LAST_FRAME,   ALTREF_FRAME},
61
  {NEARMV,    LAST_FRAME,   NONE},
Paul Wilkins's avatar
Paul Wilkins committed
62
  {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
John Koleszar's avatar
John Koleszar committed
63

Yaowu Xu's avatar
Yaowu Xu committed
64
65
  {DC_PRED,   INTRA_FRAME,  NONE},

Paul Wilkins's avatar
Paul Wilkins committed
66
67
  {NEWMV,     GOLDEN_FRAME, NONE},
  {NEWMV,     ALTREF_FRAME, NONE},
68
  {NEARMV,    ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
69

70
71
  {TM_PRED,   INTRA_FRAME,  NONE},

Paul Wilkins's avatar
Paul Wilkins committed
72
73
74
75
76
  {NEARMV,    LAST_FRAME,   ALTREF_FRAME},
  {NEWMV,     LAST_FRAME,   ALTREF_FRAME},
  {NEARMV,    GOLDEN_FRAME, NONE},
  {NEARMV,    GOLDEN_FRAME, ALTREF_FRAME},
  {NEWMV,     GOLDEN_FRAME, ALTREF_FRAME},
John Koleszar's avatar
John Koleszar committed
77

78
79
80
  {SPLITMV,   LAST_FRAME,   NONE},
  {SPLITMV,   GOLDEN_FRAME, NONE},
  {SPLITMV,   ALTREF_FRAME, NONE},
Paul Wilkins's avatar
Paul Wilkins committed
81
82
  {SPLITMV,   LAST_FRAME,   ALTREF_FRAME},
  {SPLITMV,   GOLDEN_FRAME, ALTREF_FRAME},
83

Paul Wilkins's avatar
Paul Wilkins committed
84
85
86
  {ZEROMV,    LAST_FRAME,   NONE},
  {ZEROMV,    GOLDEN_FRAME, NONE},
  {ZEROMV,    ALTREF_FRAME, NONE},
Ronald S. Bultje's avatar
Ronald S. Bultje committed
87
  {ZEROMV,    LAST_FRAME,   ALTREF_FRAME},
John Koleszar's avatar
John Koleszar committed
88
  {ZEROMV,    GOLDEN_FRAME, ALTREF_FRAME},
89

Paul Wilkins's avatar
Paul Wilkins committed
90
91
92
93
94
95
96
97
98
  {I4X4_PRED, INTRA_FRAME,  NONE},
  {H_PRED,    INTRA_FRAME,  NONE},
  {V_PRED,    INTRA_FRAME,  NONE},
  {D135_PRED, INTRA_FRAME,  NONE},
  {D27_PRED,  INTRA_FRAME,  NONE},
  {D153_PRED, INTRA_FRAME,  NONE},
  {D63_PRED,  INTRA_FRAME,  NONE},
  {D117_PRED, INTRA_FRAME,  NONE},
  {D45_PRED,  INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
99
100
};

101
102
103
104
105
106
107
108
109
110
111
// The baseline rd thresholds for breaking out of the rd loop for
// certain modes are assumed to be based on 8x8 blocks.
// This table is used to correct for blocks size.
// The factors here are << 2 (2 = x0.5, 32 = x8 etc).
static int rd_thresh_block_size_factor[BLOCK_SIZE_TYPES] =
  {2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32};

#define BASE_RD_THRESH_FREQ_FACT 16
#define MAX_RD_THRESH_FREQ_FACT 32
#define MAX_RD_THRESH_FREQ_INC 1

112
static void fill_token_costs(vp9_coeff_count (*c)[BLOCK_TYPES][2],
113
                             vp9_coeff_probs_model (*p)[BLOCK_TYPES]) {
114
  int i, j, k, l;
115
116
117
118
119
120
121
122
  TX_SIZE t;
  for (t = TX_4X4; t <= TX_32X32; t++)
    for (i = 0; i < BLOCK_TYPES; i++)
      for (j = 0; j < REF_TYPES; j++)
        for (k = 0; k < COEF_BANDS; k++)
          for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
            vp9_prob probs[ENTROPY_NODES];
            vp9_model_to_full_probs(p[t][i][j][k][l], probs);
123
            vp9_cost_tokens((int *)c[t][i][j][0][k][l], probs,
124
                            vp9_coef_tree);
125
#if CONFIG_BALANCED_COEFTREE
126
127
128
            // Replace the eob node prob with a very small value so that the
            // cost approximately equals the cost without the eob node
            probs[1] = 1;
129
            vp9_cost_tokens((int *)c[t][i][j][1][k][l], probs, vp9_coef_tree);
130
#else
131
            vp9_cost_tokens_skip((int *)c[t][i][j][1][k][l], probs,
132
                                 vp9_coef_tree);
133
134
            assert(c[t][i][j][0][k][l][DCT_EOB_TOKEN] ==
                   c[t][i][j][1][k][l][DCT_EOB_TOKEN]);
135
#endif
136
          }
137
138
}

139
140
141
142
static int rd_iifactor[32] =  { 4, 4, 3, 2, 1, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0, };
John Koleszar's avatar
John Koleszar committed
143

144
// 3* dc_qlookup[Q]*dc_qlookup[Q];
145

146
/* values are now correlated to quantizer */
Paul Wilkins's avatar
Paul Wilkins committed
147
148
149
static int sad_per_bit16lut[QINDEX_RANGE];
static int sad_per_bit4lut[QINDEX_RANGE];

150
void vp9_init_me_luts() {
John Koleszar's avatar
John Koleszar committed
151
152
153
154
155
156
157
  int i;

  // Initialize the sad lut tables using a formulaic calculation for now
  // This is to make it easier to resolve the impact of experimental changes
  // to the quantizer tables.
  for (i = 0; i < QINDEX_RANGE; i++) {
    sad_per_bit16lut[i] =
158
      (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
159
    sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742);
John Koleszar's avatar
John Koleszar committed
160
  }
Paul Wilkins's avatar
Paul Wilkins committed
161
}
John Koleszar's avatar
John Koleszar committed
162

163
static int compute_rd_mult(int qindex) {
164
  const int q = vp9_dc_quant(qindex, 0);
165
  return (11 * q * q) >> 2;
166
167
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
168
169
170
void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
  cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
  cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
John Koleszar's avatar
John Koleszar committed
171
172
}

173

Dmitry Kovalev's avatar
Dmitry Kovalev committed
174
void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
175
  int q, i, bsize;
John Koleszar's avatar
John Koleszar committed
176

177
  vp9_clear_system_state();  // __asm emms;
John Koleszar's avatar
John Koleszar committed
178

John Koleszar's avatar
John Koleszar committed
179
180
181
182
  // Further tests required to see if optimum is different
  // for key frames, golden frames and arf frames.
  // if (cpi->common.refresh_golden_frame ||
  //     cpi->common.refresh_alt_ref_frame)
183
  qindex = clamp(qindex, 0, MAXQ);
184

Dmitry Kovalev's avatar
Dmitry Kovalev committed
185
  cpi->RDMULT = compute_rd_mult(qindex);
John Koleszar's avatar
John Koleszar committed
186
187
188
189
190
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
    if (cpi->twopass.next_iiratio > 31)
      cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
    else
      cpi->RDMULT +=
Dmitry Kovalev's avatar
Dmitry Kovalev committed
191
          (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
John Koleszar's avatar
John Koleszar committed
192
  }
193
  cpi->mb.errorperbit = cpi->RDMULT >> 6;
John Koleszar's avatar
John Koleszar committed
194
  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
195

196
  vp9_set_speed_features(cpi);
John Koleszar's avatar
John Koleszar committed
197

Dmitry Kovalev's avatar
Dmitry Kovalev committed
198
199
  q = (int)pow(vp9_dc_quant(qindex, 0) >> 2, 1.25);
  q <<= 2;
John Koleszar's avatar
John Koleszar committed
200
201
  if (q < 8)
    q = 8;
202

John Koleszar's avatar
John Koleszar committed
203
204
205
  if (cpi->RDMULT > 1000) {
    cpi->RDDIV = 1;
    cpi->RDMULT /= 100;
John Koleszar's avatar
John Koleszar committed
206

207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
    for (bsize = 0; bsize < BLOCK_SIZE_TYPES; ++bsize) {
      for (i = 0; i < MAX_MODES; ++i) {
        // Threshold here seem unecessarily harsh but fine given actual
        // range of values used for cpi->sf.thresh_mult[]
        int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);

        // *4 relates to the scaling of rd_thresh_block_size_factor[]
        if ((int64_t)cpi->sf.thresh_mult[i] < thresh_max) {
          cpi->rd_threshes[bsize][i] =
            cpi->sf.thresh_mult[i] * q *
            rd_thresh_block_size_factor[bsize] / (4 * 100);
        } else {
          cpi->rd_threshes[bsize][i] = INT_MAX;
        }
        cpi->rd_baseline_thresh[bsize][i] = cpi->rd_threshes[bsize][i];
222

223
        if (cpi->sf.adaptive_rd_thresh)
Paul Wilkins's avatar
Paul Wilkins committed
224
225
226
          cpi->rd_thresh_freq_fact[bsize][i] = MAX_RD_THRESH_FREQ_FACT;
        else
          cpi->rd_thresh_freq_fact[bsize][i] = BASE_RD_THRESH_FREQ_FACT;
John Koleszar's avatar
John Koleszar committed
227
      }
John Koleszar's avatar
John Koleszar committed
228
    }
John Koleszar's avatar
John Koleszar committed
229
230
  } else {
    cpi->RDDIV = 100;
John Koleszar's avatar
John Koleszar committed
231

232
233
234
235
236
237
238
239
240
241
242
243
244
245
    for (bsize = 0; bsize < BLOCK_SIZE_TYPES; ++bsize) {
      for (i = 0; i < MAX_MODES; i++) {
        // Threshold here seem unecessarily harsh but fine given actual
        // range of values used for cpi->sf.thresh_mult[]
        int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);

        if (cpi->sf.thresh_mult[i] < thresh_max) {
          cpi->rd_threshes[bsize][i] =
            cpi->sf.thresh_mult[i] * q *
            rd_thresh_block_size_factor[bsize] / 4;
        } else {
          cpi->rd_threshes[bsize][i] = INT_MAX;
        }
        cpi->rd_baseline_thresh[bsize][i] = cpi->rd_threshes[bsize][i];
Paul Wilkins's avatar
Paul Wilkins committed
246

247
        if (cpi->sf.adaptive_rd_thresh)
Paul Wilkins's avatar
Paul Wilkins committed
248
249
250
          cpi->rd_thresh_freq_fact[bsize][i] = MAX_RD_THRESH_FREQ_FACT;
        else
          cpi->rd_thresh_freq_fact[bsize][i] = BASE_RD_THRESH_FREQ_FACT;
John Koleszar's avatar
John Koleszar committed
251
      }
John Koleszar's avatar
John Koleszar committed
252
    }
John Koleszar's avatar
John Koleszar committed
253
  }
John Koleszar's avatar
John Koleszar committed
254

255
  fill_token_costs(cpi->mb.token_costs, cpi->common.fc.coef_probs);
256

257
  for (i = 0; i < NUM_PARTITION_CONTEXTS; i++)
258
    vp9_cost_tokens(cpi->mb.partition_cost[i],
259
                    cpi->common.fc.partition_prob[cpi->common.frame_type][i],
260
261
                    vp9_partition_tree);

John Koleszar's avatar
John Koleszar committed
262
  /*rough estimate for costing*/
263
  vp9_init_mode_costs(cpi);
John Koleszar's avatar
John Koleszar committed
264

265
  if (cpi->common.frame_type != KEY_FRAME) {
266
    vp9_build_nmv_cost_table(
267
268
269
270
271
        cpi->mb.nmvjointcost,
        cpi->mb.e_mbd.allow_high_precision_mv ?
        cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
        &cpi->common.fc.nmvc,
        cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
272
273
274
275
276
277

    for (i = 0; i < INTER_MODE_CONTEXTS; i++) {
      MB_PREDICTION_MODE m;

      for (m = NEARESTMV; m < MB_MODE_COUNT; m++)
        cpi->mb.inter_mode_cost[i][m - NEARESTMV] =
278
            cost_token(vp9_inter_mode_tree,
279
                       cpi->common.fc.inter_mode_probs[i],
280
                       vp9_inter_mode_encodings - NEARESTMV + m);
281
    }
282
  }
John Koleszar's avatar
John Koleszar committed
283
284
}

285
286
static INLINE BLOCK_SIZE_TYPE get_block_size(int bwl, int bhl) {
  return bsize_from_dim_lookup[bwl][bhl];
Deb Mukherjee's avatar
Deb Mukherjee committed
287
288
}

289
290
291
292
static BLOCK_SIZE_TYPE get_plane_block_size(BLOCK_SIZE_TYPE bsize,
                                            struct macroblockd_plane *pd) {
  return get_block_size(plane_block_width_log2by4(bsize, pd),
                        plane_block_height_log2by4(bsize, pd));
Deb Mukherjee's avatar
Deb Mukherjee committed
293
294
}

Yaowu Xu's avatar
Yaowu Xu committed
295
static INLINE void linear_interpolate2(double x, int ntab, int inv_step,
296
297
                                       const double *tab1, const double *tab2,
                                       double *v1, double *v2) {
Deb Mukherjee's avatar
Deb Mukherjee committed
298
299
300
  double y = x * inv_step;
  int d = (int) y;
  if (d >= ntab - 1) {
301
302
    *v1 = tab1[ntab - 1];
    *v2 = tab2[ntab - 1];
Deb Mukherjee's avatar
Deb Mukherjee committed
303
304
  } else {
    double a = y - d;
305
306
    *v1 = tab1[d] * (1 - a) + tab1[d + 1] * a;
    *v2 = tab2[d] * (1 - a) + tab2[d + 1] * a;
Deb Mukherjee's avatar
Deb Mukherjee committed
307
308
309
  }
}

310
311
312
313
314
static void model_rd_norm(double x, double *R, double *D) {
  static const int inv_tab_step = 8;
  static const int tab_size = 120;
  // NOTE: The tables below must be of the same size
  //
Deb Mukherjee's avatar
Deb Mukherjee committed
315
  // Normalized rate
316
  // This table models the rate for a Laplacian source
Deb Mukherjee's avatar
Deb Mukherjee committed
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
  // source with given variance when quantized with a uniform quantizer
  // with given stepsize. The closed form expression is:
  // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
  // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
  // and H(x) is the binary entropy function.
  static const double rate_tab[] = {
    64.00, 4.944, 3.949, 3.372, 2.966, 2.655, 2.403, 2.194,
    2.014, 1.858, 1.720, 1.596, 1.485, 1.384, 1.291, 1.206,
    1.127, 1.054, 0.986, 0.923, 0.863, 0.808, 0.756, 0.708,
    0.662, 0.619, 0.579, 0.541, 0.506, 0.473, 0.442, 0.412,
    0.385, 0.359, 0.335, 0.313, 0.291, 0.272, 0.253, 0.236,
    0.220, 0.204, 0.190, 0.177, 0.165, 0.153, 0.142, 0.132,
    0.123, 0.114, 0.106, 0.099, 0.091, 0.085, 0.079, 0.073,
    0.068, 0.063, 0.058, 0.054, 0.050, 0.047, 0.043, 0.040,
    0.037, 0.034, 0.032, 0.029, 0.027, 0.025, 0.023, 0.022,
    0.020, 0.019, 0.017, 0.016, 0.015, 0.014, 0.013, 0.012,
    0.011, 0.010, 0.009, 0.008, 0.008, 0.007, 0.007, 0.006,
    0.006, 0.005, 0.005, 0.005, 0.004, 0.004, 0.004, 0.003,
    0.003, 0.003, 0.003, 0.002, 0.002, 0.002, 0.002, 0.002,
    0.002, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001,
    0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.000,
  };
  // Normalized distortion
340
  // This table models the normalized distortion for a Laplacian source
Deb Mukherjee's avatar
Deb Mukherjee committed
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
  // source with given variance when quantized with a uniform quantizer
  // with given stepsize. The closed form expression is:
  // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
  // where x = qpstep / sqrt(variance)
  // Note the actual distortion is Dn * variance.
  static const double dist_tab[] = {
    0.000, 0.001, 0.005, 0.012, 0.021, 0.032, 0.045, 0.061,
    0.079, 0.098, 0.119, 0.142, 0.166, 0.190, 0.216, 0.242,
    0.269, 0.296, 0.324, 0.351, 0.378, 0.405, 0.432, 0.458,
    0.484, 0.509, 0.534, 0.557, 0.580, 0.603, 0.624, 0.645,
    0.664, 0.683, 0.702, 0.719, 0.735, 0.751, 0.766, 0.780,
    0.794, 0.807, 0.819, 0.830, 0.841, 0.851, 0.861, 0.870,
    0.878, 0.886, 0.894, 0.901, 0.907, 0.913, 0.919, 0.925,
    0.930, 0.935, 0.939, 0.943, 0.947, 0.951, 0.954, 0.957,
    0.960, 0.963, 0.966, 0.968, 0.971, 0.973, 0.975, 0.976,
    0.978, 0.980, 0.981, 0.982, 0.984, 0.985, 0.986, 0.987,
    0.988, 0.989, 0.990, 0.990, 0.991, 0.992, 0.992, 0.993,
    0.993, 0.994, 0.994, 0.995, 0.995, 0.996, 0.996, 0.996,
    0.996, 0.997, 0.997, 0.997, 0.997, 0.998, 0.998, 0.998,
    0.998, 0.998, 0.998, 0.999, 0.999, 0.999, 0.999, 0.999,
    0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 1.000,
  };
363
364
365
366
367
  /*
  assert(sizeof(rate_tab) == tab_size * sizeof(rate_tab[0]);
  assert(sizeof(dist_tab) == tab_size * sizeof(dist_tab[0]);
  assert(sizeof(rate_tab) == sizeof(dist_tab));
  */
Deb Mukherjee's avatar
Deb Mukherjee committed
368
  assert(x >= 0.0);
369
370
  linear_interpolate2(x, tab_size, inv_tab_step,
                      rate_tab, dist_tab, R, D);
Deb Mukherjee's avatar
Deb Mukherjee committed
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
}

static void model_rd_from_var_lapndz(int var, int n, int qstep,
                                     int *rate, int64_t *dist) {
  // This function models the rate and distortion for a Laplacian
  // source with given variance when quantized with a uniform quantizer
  // with given stepsize. The closed form expressions are in:
  // Hang and Chen, "Source Model for transform video coder and its
  // application - Part I: Fundamental Theory", IEEE Trans. Circ.
  // Sys. for Video Tech., April 1997.
  vp9_clear_system_state();
  if (var == 0 || n == 0) {
    *rate = 0;
    *dist = 0;
  } else {
    double D, R;
    double s2 = (double) var / n;
    double x = qstep / sqrt(s2);
389
390
391
    model_rd_norm(x, &R, &D);
    *rate = ((n << 8) * R + 0.5);
    *dist = (var * D + 0.5);
Deb Mukherjee's avatar
Deb Mukherjee committed
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
  }
  vp9_clear_system_state();
}

static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
                            MACROBLOCK *x, MACROBLOCKD *xd,
                            int *out_rate_sum, int64_t *out_dist_sum) {
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
  int i, rate_sum = 0, dist_sum = 0;

  for (i = 0; i < MAX_MB_PLANE; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &xd->plane[i];

    // TODO(dkovalev) the same code in get_plane_block_size
409
410
411
    const int bwl = plane_block_width_log2by4(bsize, pd);
    const int bhl = plane_block_height_log2by4(bsize, pd);
    const BLOCK_SIZE_TYPE bs = get_block_size(bwl, bhl);
Deb Mukherjee's avatar
Deb Mukherjee committed
412
413
414
415
416
417
    unsigned int sse;
    int rate;
    int64_t dist;
    (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
                              pd->dst.buf, pd->dst.stride, &sse);
    // sse works better than var, since there is no dc prediction used
418
419
    model_rd_from_var_lapndz(sse, 16 << (bwl + bhl),
                             pd->dequant[1] >> 3, &rate, &dist);
Deb Mukherjee's avatar
Deb Mukherjee committed
420
421
422
423
424
425
426
427
428

    rate_sum += rate;
    dist_sum += dist;
  }

  *out_rate_sum = rate_sum;
  *out_dist_sum = dist_sum << 4;
}

429
430
431
432
433
434
435
436
437
438
static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
                              MACROBLOCK *x, MACROBLOCKD *xd,
                              int *out_rate_sum, int64_t *out_dist_sum) {
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];

  // TODO(dkovalev) the same code in get_plane_block_size
439
440
441
  const int bwl = plane_block_width_log2by4(bsize, pd);
  const int bhl = plane_block_height_log2by4(bsize, pd);
  const BLOCK_SIZE_TYPE bs = get_block_size(bwl, bhl);
442
443
444
445
446
447
  unsigned int sse;
  int rate;
  int64_t dist;
  (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
                            pd->dst.buf, pd->dst.stride, &sse);
  // sse works better than var, since there is no dc prediction used
448
449
  model_rd_from_var_lapndz(sse, 16 << (bwl + bhl),
                           pd->dequant[1] >> 3, &rate, &dist);
450
451
452
453
454

  *out_rate_sum = rate;
  *out_dist_sum = dist << 4;
}

Deb Mukherjee's avatar
Deb Mukherjee committed
455
456
457
458
459
static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
                                 TX_SIZE tx_size,
                                 MACROBLOCK *x, MACROBLOCKD *xd,
                                 int *out_rate_sum, int64_t *out_dist_sum,
                                 int *out_skip) {
460
  int t = 4, j, k;
461
  BLOCK_SIZE_TYPE bs = BLOCK_SIZE_AB4X4;
Deb Mukherjee's avatar
Deb Mukherjee committed
462
463
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];
464
465
466
467
  const int bwl = plane_block_width_log2by4(bsize, pd);
  const int bhl = plane_block_height_log2by4(bsize, pd);
  const int bw = 4 << bwl;
  const int bh = 4 << bhl;
Deb Mukherjee's avatar
Deb Mukherjee committed
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
  int rate_sum = 0;
  int64_t dist_sum = 0;

  if (tx_size == TX_4X4) {
    bs = BLOCK_4X4;
    t = 4;
  } else if (tx_size == TX_8X8) {
    bs = BLOCK_8X8;
    t = 8;
  } else if (tx_size == TX_16X16) {
    bs = BLOCK_16X16;
    t = 16;
  } else if (tx_size == TX_32X32) {
    bs = BLOCK_32X32;
    t = 32;
  } else {
    assert(0);
  }
486
  assert(bs <= get_block_size(bwl, bhl));
Deb Mukherjee's avatar
Deb Mukherjee committed
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
  *out_skip = 1;
  for (j = 0; j < bh; j+=t) {
    for (k = 0; k < bw; k+=t) {
      int rate;
      int64_t dist;
      unsigned int sse;
      (void) cpi->fn_ptr[bs].vf(p->src.buf + j * p->src.stride + k,
                                p->src.stride,
                                pd->dst.buf + j * pd->dst.stride + k,
                                pd->dst.stride, &sse);
      // sse works better than var, since there is no dc prediction used
      model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3,
                               &rate, &dist);
      rate_sum += rate;
      dist_sum += dist;
      *out_skip &= (rate < 1024);
    }
  }
  *out_rate_sum = rate_sum;
  *out_dist_sum = (dist_sum << 4);
}

Ronald S. Bultje's avatar
Ronald S. Bultje committed
509
int64_t vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff,
510
                          intptr_t block_size, int64_t *ssz) {
Ronald S. Bultje's avatar
Ronald S. Bultje committed
511
  int i;
512
  int64_t error = 0, sqcoeff = 0;
John Koleszar's avatar
John Koleszar committed
513

514
  for (i = 0; i < block_size; i++) {
John Koleszar's avatar
John Koleszar committed
515
    int this_diff = coeff[i] - dqcoeff[i];
Ronald S. Bultje's avatar
Ronald S. Bultje committed
516
    error += (unsigned)this_diff * this_diff;
517
    sqcoeff += (unsigned) coeff[i] * coeff[i];
John Koleszar's avatar
John Koleszar committed
518
  }
John Koleszar's avatar
John Koleszar committed
519

520
  *ssz = sqcoeff;
John Koleszar's avatar
John Koleszar committed
521
  return error;
John Koleszar's avatar
John Koleszar committed
522
523
}

524
static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
525
                              int plane, int block, PLANE_TYPE type,
526
527
                              ENTROPY_CONTEXT *A,
                              ENTROPY_CONTEXT *L,
John Koleszar's avatar
John Koleszar committed
528
529
                              TX_SIZE tx_size,
                              int y_blocks) {
530
  MACROBLOCKD *const xd = &mb->e_mbd;
531
532
  MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
  int pt;
533
  int c = 0;
534
  int cost = 0;
535
  const int16_t *scan = NULL, *nb;
536
  const int eob = xd->plane[plane].eobs[block];
537
  const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff, block, 16);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
538
  const int ref = mbmi->ref_frame[0] != INTRA_FRAME;
539
540
  unsigned int (*token_costs)[COEF_BANDS][PREV_COEF_CONTEXTS]
                    [MAX_ENTROPY_TOKENS] = mb->token_costs[tx_size][type][ref];
541
  ENTROPY_CONTEXT above_ec = 0, left_ec = 0;
542
  TX_TYPE tx_type = DCT_DCT;
543
  const int segment_id = xd->mode_info_context->mbmi.segment_id;
544
  int seg_eob = 0;
545
  uint8_t token_cache[1024];
546
  const uint8_t *band_translate = NULL;
547
548

  // Check for consistency of tx_size with mode info
549
  assert((!type && !plane) || (type && plane));
550
551
552
  if (type == PLANE_TYPE_Y_WITH_DC) {
    assert(xd->mode_info_context->mbmi.txfm_size == tx_size);
  } else {
553
    assert(tx_size == get_uv_tx_size(mbmi));
554
555
  }

556
  switch (tx_size) {
557
    case TX_4X4: {
558
      tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
559
          get_tx_type_4x4(xd, block) : DCT_DCT;
560
561
      above_ec = A[0] != 0;
      left_ec = L[0] != 0;
562
      seg_eob = 16;
563
      scan = get_scan_4x4(tx_type);
Paul Wilkins's avatar
Paul Wilkins committed
564
      band_translate = vp9_coefband_trans_4x4;
Daniel Kang's avatar
Daniel Kang committed
565
      break;
566
    }
567
    case TX_8X8: {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
568
569
      const TX_TYPE tx_type = type == PLANE_TYPE_Y_WITH_DC ?
                                  get_tx_type_8x8(xd) : DCT_DCT;
570
571
      above_ec = (A[0] + A[1]) != 0;
      left_ec = (L[0] + L[1]) != 0;
572
      scan = get_scan_8x8(tx_type);
573
      seg_eob = 64;
Paul Wilkins's avatar
Paul Wilkins committed
574
      band_translate = vp9_coefband_trans_8x8plus;
Daniel Kang's avatar
Daniel Kang committed
575
      break;
576
577
    }
    case TX_16X16: {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
578
579
      const TX_TYPE tx_type = type == PLANE_TYPE_Y_WITH_DC ?
                                  get_tx_type_16x16(xd) : DCT_DCT;
580
      scan = get_scan_16x16(tx_type);
581
      seg_eob = 256;
582
583
      above_ec = (A[0] + A[1] + A[2] + A[3]) != 0;
      left_ec = (L[0] + L[1] + L[2] + L[3]) != 0;
Paul Wilkins's avatar
Paul Wilkins committed
584
      band_translate = vp9_coefband_trans_8x8plus;
Daniel Kang's avatar
Daniel Kang committed
585
      break;
586
    }
587
    case TX_32X32:
Paul Wilkins's avatar
Paul Wilkins committed
588
      scan = vp9_default_scan_32x32;
589
      seg_eob = 1024;
590
591
      above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0;
      left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0;
Paul Wilkins's avatar
Paul Wilkins committed
592
      band_translate = vp9_coefband_trans_8x8plus;
593
      break;
Daniel Kang's avatar
Daniel Kang committed
594
    default:
Dmitry Kovalev's avatar
Dmitry Kovalev committed
595
      assert(0);
Daniel Kang's avatar
Daniel Kang committed
596
597
      break;
  }
John Koleszar's avatar
John Koleszar committed
598
  assert(eob <= seg_eob);
599

600
  pt = combine_entropy_contexts(above_ec, left_ec);
601
  nb = vp9_get_coef_neighbors_handle(scan);
602

603
  if (vp9_segfeature_active(&xd->seg, segment_id, SEG_LVL_SKIP))
604
    seg_eob = 0;
605

606
607
608
609
  /* sanity check to ensure that we do not have spurious non-zero q values */
  if (eob < seg_eob)
    assert(qcoeff_ptr[scan[eob]] == 0);

610
611
612
613
  if (eob == 0) {
    // single eob token
    cost += token_costs[0][0][pt][DCT_EOB_TOKEN];
  } else {
614
    int v, prev_t;
615
616
617

    // dc token
    v = qcoeff_ptr[0];
618
619
620
    prev_t = vp9_dct_value_tokens_ptr[v].token;
    cost += token_costs[0][0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
    token_cache[0] = vp9_pt_energy_class[prev_t];
621
622
623
624

    // ac tokens
    for (c = 1; c < eob; c++) {
      const int rc = scan[c];
625
626
      const int band = get_coef_band(band_translate, c);
      int t;
627
628
629

      v = qcoeff_ptr[rc];
      t = vp9_dct_value_tokens_ptr[v].token;
630
      pt = get_coef_context(nb, token_cache, c);
631
      cost += token_costs[!prev_t][band][pt][t] + vp9_dct_value_cost_ptr[v];
632
      token_cache[rc] = vp9_pt_energy_class[t];
633
      prev_t = t;
634
    }
635
636

    // eob token
637
    if (c < seg_eob) {
638
      pt = get_coef_context(nb, token_cache, c);
639
640
      cost += token_costs[0][get_coef_band(band_translate, c)][pt]
                         [DCT_EOB_TOKEN];
641
    }
642
643
  }

644
645
646
  // is eob first coefficient;
  for (pt = 0; pt < (1 << tx_size); pt++) {
    A[pt] = L[pt] = c > 0;
647
  }
648

649
650
651
  return cost;
}

Deb Mukherjee's avatar
Deb Mukherjee committed
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
struct rdcost_block_args {
  VP9_COMMON *cm;
  MACROBLOCK *x;
  ENTROPY_CONTEXT t_above[16];
  ENTROPY_CONTEXT t_left[16];
  TX_SIZE tx_size;
  int bw;
  int bh;
  int rate;
  int64_t dist;
  int64_t sse;
  int64_t best_rd;
  int skip;
};

static void dist_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
                       int ss_txfrm_size, void *arg) {
  struct rdcost_block_args* args = arg;
  MACROBLOCK* const x = args->x;
  MACROBLOCKD* const xd = &x->e_mbd;
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];
  int64_t this_sse;
  int shift = args->tx_size == TX_32X32 ? 0 : 2;
  int16_t *const coeff = BLOCK_OFFSET(p->coeff, block, 16);
  int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block, 16);
  args->dist += vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
                                &this_sse) >> shift;
  args->sse += this_sse >> shift;
681
682
683
684
685
686
687
688
689

  if (x->skip_encode &&
      xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME) {
    // TODO(jingning): tune the model to better capture the distortion.
    int64_t p = (pd->dequant[1] * pd->dequant[1] *
                    (1 << ss_txfrm_size)) >> shift;
    args->dist += p;
    args->sse  += p;
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
}

static void rate_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
                       int ss_txfrm_size, void *arg) {
  struct rdcost_block_args* args = arg;
  int x_idx, y_idx;
  MACROBLOCKD * const xd = &args->x->e_mbd;

  txfrm_block_to_raster_xy(xd, bsize, plane, block, args->tx_size * 2, &x_idx,
                           &y_idx);

  args->rate += cost_coeffs(args->cm, args->x, plane, block,
                            xd->plane[plane].plane_type, args->t_above + x_idx,
                            args->t_left + y_idx, args->tx_size,
                            args->bw * args->bh);
}

707
708
// FIXME(jingning): need to make the rd test of chroma components consistent
// with that of luma component. this function should be deprecated afterwards.
Deb Mukherjee's avatar
Deb Mukherjee committed
709
710
711
712
713
714
715
static int rdcost_plane(VP9_COMMON * const cm, MACROBLOCK *x, int plane,
                        BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
  MACROBLOCKD * const xd = &x->e_mbd;
  const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
  const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y;
  const int bw = 1 << bwl, bh = 1 << bhl;
  struct rdcost_block_args args = { cm, x, { 0 }, { 0 }, tx_size, bw, bh,
716
    0, 0, 0, INT64_MAX, 0 };
Deb Mukherjee's avatar
Deb Mukherjee committed
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739

  vpx_memcpy(&args.t_above, xd->plane[plane].above_context,
             sizeof(ENTROPY_CONTEXT) * bw);
  vpx_memcpy(&args.t_left, xd->plane[plane].left_context,
             sizeof(ENTROPY_CONTEXT) * bh);

  foreach_transformed_block_in_plane(xd, bsize, plane, rate_block, &args);
  return args.rate;
}

static int rdcost_uv(VP9_COMMON *const cm, MACROBLOCK *x,
                     BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
  int cost = 0, plane;

  for (plane = 1; plane < MAX_MB_PLANE; plane++) {
    cost += rdcost_plane(cm, x, plane, bsize, tx_size);
  }
  return cost;
}

static int block_error_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize,
                           int shift, int64_t *sse) {
  struct macroblockd_plane *p = &x->e_mbd.plane[0];
740
741
  const int bwl = plane_block_width_log2by4(bsize, p);
  const int bhl = plane_block_height_log2by4(bsize, p);
Deb Mukherjee's avatar
Deb Mukherjee committed
742
  int64_t e = vp9_block_error(x->plane[0].coeff, x->e_mbd.plane[0].dqcoeff,
743
                              16 << (bwl + bhl), sse) >> shift;
Deb Mukherjee's avatar
Deb Mukherjee committed
744
745
746
747
748
749
750
751
752
753
754
755
  *sse >>= shift;
  return e;
}

static int64_t block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize,
                                int shift, int64_t *sse) {
  int64_t sum = 0, this_sse;
  int plane;

  *sse = 0;
  for (plane = 1; plane < MAX_MB_PLANE; plane++) {
    struct macroblockd_plane *p = &x->e_mbd.plane[plane];
756
757
    const int bwl = plane_block_width_log2by4(bsize, p);
    const int bhl = plane_block_height_log2by4(bsize, p);
Deb Mukherjee's avatar
Deb Mukherjee committed
758
    sum += vp9_block_error(x->plane[plane].coeff, x->e_mbd.plane[plane].dqcoeff,
759
                           16 << (bwl + bhl), &this_sse);
Deb Mukherjee's avatar
Deb Mukherjee committed
760
761
762
763
764
765
766
767
768
769
770
771
    *sse += this_sse;
  }
  *sse >>= shift;
  return sum >> shift;
}

static void block_yrd_txfm(int plane, int block, BLOCK_SIZE_TYPE bsize,
                           int ss_txfrm_size, void *arg) {
  struct rdcost_block_args *args = arg;
  MACROBLOCK *const x = args->x;
  MACROBLOCKD *const xd = &x->e_mbd;
  struct encode_b_args encode_args = {args->cm, x, NULL};
772
  int64_t rd1, rd2, rd;
Deb Mukherjee's avatar
Deb Mukherjee committed
773

774
775
  if (args->skip)
    return;
776
777
778
779
  rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
  rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
  rd = MIN(rd1, rd2);
  if (rd > args->best_rd) {
780
781
782
783
784
785
786
    args->skip = 1;
    args->rate = INT_MAX;
    args->dist = INT64_MAX;
    args->sse  = INT64_MAX;
    return;
  }

Deb Mukherjee's avatar
Deb Mukherjee committed
787
788
789
790
791
792
793
794
795
796
797
798
  if (xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME)
    encode_block_intra(plane, block, bsize, ss_txfrm_size, &encode_args);
  else
    xform_quant(plane, block, bsize, ss_txfrm_size, &encode_args);

  dist_block(plane, block, bsize, ss_txfrm_size, args);
  rate_block(plane, block, bsize, ss_txfrm_size, args);
}

static void super_block_yrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
                                     int *rate, int64_t *distortion,
                                     int *skippable, int64_t *sse,
799
                                     int64_t ref_best_rd,
Deb Mukherjee's avatar
Deb Mukherjee committed
800
801
802
803
804
805
806
                                     BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
  MACROBLOCKD *const xd = &x->e_mbd;
  struct macroblockd_plane *const pd = &xd->plane[0];
  const int bwl = b_width_log2(bsize) - xd->plane[0].subsampling_x;
  const int bhl = b_height_log2(bsize) - xd->plane[0].subsampling_y;
  const int bw = 1 << bwl, bh = 1 << bhl;
  struct rdcost_block_args args = { cm, x, { 0 }, { 0 }, tx_size, bw, bh,
807
                                    0, 0, 0, ref_best_rd, 0 };
Deb Mukherjee's avatar
Deb Mukherjee committed
808
809
810
811
812
813
814
815
  xd->mode_info_context->mbmi.txfm_size = tx_size;
  vpx_memcpy(&args.t_above, pd->above_context, sizeof(ENTROPY_CONTEXT) * bw);
  vpx_memcpy(&args.t_left, pd->left_context, sizeof(ENTROPY_CONTEXT) * bh);

  foreach_transformed_block_in_plane(xd, bsize, 0, block_yrd_txfm, &args);
  *distortion = args.dist;
  *rate       = args.rate;
  *sse        = args.sse;
816
  *skippable  = vp9_sby_is_skippable(xd, bsize) && (!args.skip);
Deb Mukherjee's avatar
Deb Mukherjee committed
817
818
819
820
821
}

static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x,
                                     int *rate, int64_t *distortion,
                                     int *skip, int64_t *sse,
822
                                     int64_t ref_best_rd,
Deb Mukherjee's avatar
Deb Mukherjee committed
823
824
825
826
827
828
829
                                     BLOCK_SIZE_TYPE bs) {
  const TX_SIZE max_txfm_size = TX_32X32
      - (bs < BLOCK_SIZE_SB32X32) - (bs < BLOCK_SIZE_MB16X16);
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
  if (max_txfm_size == TX_32X32 &&
830
831
      (cm->tx_mode == ALLOW_32X32 ||
       cm->tx_mode == TX_MODE_SELECT)) {
Deb Mukherjee's avatar
Deb Mukherjee committed
832
833
    mbmi->txfm_size = TX_32X32;
  } else if (max_txfm_size >= TX_16X16 &&
834
835
836
             (cm->tx_mode == ALLOW_16X16 ||
              cm->tx_mode == ALLOW_32X32 ||
              cm->tx_mode == TX_MODE_SELECT)) {
Deb Mukherjee's avatar
Deb Mukherjee committed
837
    mbmi->txfm_size = TX_16X16;
838
  } else if (cm->tx_mode != ONLY_4X4) {
Deb Mukherjee's avatar
Deb Mukherjee committed
839
840
841
842
843
    mbmi->txfm_size = TX_8X8;
  } else {
    mbmi->txfm_size = TX_4X4;
  }
  super_block_yrd_for_txfm(cm, x, rate, distortion, skip,
844
                           &sse[mbmi->txfm_size], ref_best_rd, bs,
Deb Mukherjee's avatar
Deb Mukherjee committed
845
846
847
848
                           mbmi->txfm_size);
  cpi->txfm_stepdown_count[0]++;
}

849
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
850
                                     int (*r)[2], int *rate,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
851
                                     int64_t *d, int64_t *distortion,
852
853
                                     int *s, int *skip,
                                     int64_t txfm_cache[NB_TXFM_MODES],
Deb Mukherjee's avatar
Deb Mukherjee committed
854
855
856
                                     BLOCK_SIZE_TYPE bs) {
  const TX_SIZE max_txfm_size = TX_32X32
      - (bs < BLOCK_SIZE_SB32X32) - (bs < BLOCK_SIZE_MB16X16);
857
858
859
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
860
  vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd);
861
862
  int64_t rd[TX_SIZE_MAX_SB][2];
  int n, m;
863
  int s0, s1;
864

865
  const vp9_prob *tx_probs = vp9_get_pred_probs_tx_size(xd, &cm->fc.tx_probs);
866

867
868
  for (n = TX_4X4; n <= max_txfm_size; n++) {
    r[n][1] = r[n][0];
869
870
    if (r[n][0] == INT_MAX)
      continue;
871
872
    for (m = 0; m <= n - (n == max_txfm_size); m++) {
      if (m == n)
873
        r[n][1] += vp9_cost_zero(tx_probs[m]);
874
      else
875
        r[n][1] += vp9_cost_one(tx_probs[m]);
876
877
    }
  }
878

879
880
881
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
882

883
  for (n = TX_4X4; n <= max_txfm_size; n++) {
884
885
886
887
    if (d[n] == INT64_MAX) {
      rd[n][0] = rd[n][1] = INT64_MAX;
      continue;
    }
888
889
890
891
892
    if (s[n]) {
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
893
894
895
    }
  }

896
  if (max_txfm_size == TX_32X32 &&
897
898
      (cm->tx_mode == ALLOW_32X32 ||
       (cm->tx_mode == TX_MODE_SELECT &&
899
900
901
        rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
        rd[TX_32X32][1] < rd[TX_4X4][1]))) {
    mbmi->txfm_size = TX_32X32;
902
  } else if (max_txfm_size >= TX_16X16 &&
903
904
905
             (cm->tx_mode == ALLOW_16X16 ||
              cm->tx_mode == ALLOW_32X32 ||
              (cm->tx_mode == TX_MODE_SELECT &&
906
907
               rd[TX_16X16][1] < rd[TX_8X8][1] &&
               rd[TX_16X16][1] < rd[TX_4X4][1]))) {
908
    mbmi->txfm_size = TX_16X16;
909
910
911
912
  } else if (cm->tx_mode == ALLOW_8X8 ||
             cm->tx_mode == ALLOW_16X16 ||
             cm->tx_mode == ALLOW_32X32 ||
           (cm->tx_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
913
914
915
916
917
    mbmi->txfm_size = TX_8X8;
  } else {
    mbmi->txfm_size = TX_4X4;
  }

918
  *distortion = d[mbmi->txfm_size];
919
  *rate       = r[mbmi->txfm_size][cm->tx_mode == TX_MODE_SELECT];
920
921
  *skip       = s[mbmi->txfm_size];

922
923
  txfm_cache[ONLY_4X4] = rd[TX_4X4][0];
  txfm_cache[ALLOW_8X8] = rd[TX_8X8][0];
924
925
  txfm_cache[ALLOW_16X16] = rd[MIN(max_txfm_size, TX_16X16)][0];
  txfm_cache[ALLOW_32X32] = rd[MIN(max_txfm_size, TX_32X32)][0];
926
927
928
929
  if (max_txfm_size == TX_32X32 &&
      rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
      rd[TX_32X32][1] < rd[TX_4X4][1])
    txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
930
931
  else if (max_txfm_size >= TX_16X16 &&
           rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
932
    txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
933
  else
934
935
    txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
                                 rd[TX_4X4][1] : rd[TX_8X8][1];
936

Deb Mukherjee's avatar
Deb Mukherjee committed
937
938
939
940
941
942
943
944
945
946
947
948
949
  if (max_txfm_size == TX_32X32 &&
      rd[TX_32X32][1] < rd[TX_16X16][1] &&
      rd[TX_32X32][1] < rd[TX_8X8][1] &&
      rd[TX_32X32][1] < rd[TX_4X4][1]) {
    cpi->txfm_stepdown_count[0]++;
  } else if (max_txfm_size >= TX_16X16 &&
             rd[TX_16X16][1] < rd[TX_8X8][1] &&
             rd[TX_16X16][1] < rd[TX_4X4][1]) {
    cpi->txfm_stepdown_count[max_txfm_size - TX_16X16]++;
  } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
    cpi->txfm_stepdown_count[max_txfm_size - TX_8X8]++;
  } else {
    cpi->txfm_stepdown_count[max_txfm_size - TX_4X4]++;
950
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
951
}
952

Deb Mukherjee's avatar
Deb Mukherjee committed
953
954
955
956
static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
                                          int (*r)[2], int *rate,
                                          int64_t *d, int64_t *distortion,
                                          int *s, int *skip, int64_t *sse,
957
                                          int64_t ref_best_rd,
Deb Mukherjee's avatar
Deb Mukherjee committed
958
959
960
961
962
963
964
                                          BLOCK_SIZE_TYPE bs,
                                          int *model_used) {
  const TX_SIZE max_txfm_size = TX_32X32
      - (bs < BLOCK_SIZE_SB32X32) - (bs < BLOCK_SIZE_MB16X16);
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
965
  vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd);
Deb Mukherjee's avatar
Deb Mukherjee committed
966
967
968
969
970
  int64_t rd[TX_SIZE_MAX_SB][2];
  int n, m;
  int s0, s1;
  double scale_rd[TX_SIZE_MAX_SB] = {1.73, 1.44, 1.20, 1.00};
  // double scale_r[TX_SIZE_MAX_SB] = {2.82, 2.00, 1.41, 1.00};
971

972
  const vp9_prob *tx_probs = vp9_get_pred_probs_tx_size(xd, &cm->fc.tx_probs);
973

Deb Mukherjee's avatar
Deb Mukherjee committed
974
975
  // for (n = TX_4X4; n <= max_txfm_size; n++)
  //   r[n][0] = (r[n][0] * scale_r[n]);
976

Deb Mukherjee's avatar
Deb Mukherjee committed
977
978
979
980
981
982
983
984
  for (n = TX_4X4; n <= max_txfm_size; n++) {
    r[n][1] = r[n][0];
    for (m = 0; m <= n - (n == max_txfm_size); m++) {
      if (m == n)
        r[n][1] += vp9_cost_zero(tx_probs[m]);
      else
        r[n][1] += vp9_cost_one(tx_probs[m]);
    }
985
  }
986

Deb Mukherjee's avatar
Deb Mukherjee committed
987
988
989
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
990

Deb Mukherjee's avatar
Deb Mukherjee committed
991
992
993
994
995
996
997
998
999
1000
1001
1002
  for (n = TX_4X4; n