vp9_rdopt.c 146 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
11
12
13
14
15
 */


#include <stdio.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
16

17
#include "vp9/common/vp9_pragmas.h"
18
19
20
21
22
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_modecosts.h"
#include "vp9/encoder/vp9_encodeintra.h"
23
24
25
26
27
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_quant_common.h"
28
29
30
31
32
33
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_variance.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_ratectrl.h"
John Koleszar's avatar
John Koleszar committed
34
#include "vpx_mem/vpx_mem.h"
35
36
37
38
39
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_entropy.h"
40
#include "vp9_rtcd.h"
41
#include "vp9/common/vp9_mvref_common.h"
Ronald S. Bultje's avatar
Ronald S. Bultje committed
42
#include "vp9/common/vp9_common.h"
Paul Wilkins's avatar
Paul Wilkins committed
43

44
45
#define INVALID_MV 0x80008000

46
47
48
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1

49
50
51
DECLARE_ALIGNED(16, extern const uint8_t,
                vp9_pt_energy_class[MAX_ENTROPY_TOKENS]);

Ronald S. Bultje's avatar
Ronald S. Bultje committed
52
#define I4X4_PRED 0x8000
Ronald S. Bultje's avatar
Ronald S. Bultje committed
53
#define SPLITMV 0x10000
Ronald S. Bultje's avatar
Ronald S. Bultje committed
54

55
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
56
  {ZEROMV,    LAST_FRAME,   NONE},
57
58
  {DC_PRED,   INTRA_FRAME,  NONE},

59
60
  {NEARESTMV, LAST_FRAME,   NONE},
  {NEARMV,    LAST_FRAME,   NONE},
John Koleszar's avatar
John Koleszar committed
61

62
63
64
  {ZEROMV,    GOLDEN_FRAME, NONE},
  {NEARESTMV, GOLDEN_FRAME, NONE},

65
66
67
  {ZEROMV,    ALTREF_FRAME, NONE},
  {NEARESTMV, ALTREF_FRAME, NONE},

68
69
  {NEARMV,    GOLDEN_FRAME, NONE},
  {NEARMV,    ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
70

71
72
73
74
75
76
77
78
  {V_PRED,    INTRA_FRAME,  NONE},
  {H_PRED,    INTRA_FRAME,  NONE},
  {D45_PRED,  INTRA_FRAME,  NONE},
  {D135_PRED, INTRA_FRAME,  NONE},
  {D117_PRED, INTRA_FRAME,  NONE},
  {D153_PRED, INTRA_FRAME,  NONE},
  {D27_PRED,  INTRA_FRAME,  NONE},
  {D63_PRED,  INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
79

80
81
  {TM_PRED,   INTRA_FRAME,  NONE},

82
83
84
  {NEWMV,     LAST_FRAME,   NONE},
  {NEWMV,     GOLDEN_FRAME, NONE},
  {NEWMV,     ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
85

86
87
88
89
90
91
  {SPLITMV,   LAST_FRAME,   NONE},
  {SPLITMV,   GOLDEN_FRAME, NONE},
  {SPLITMV,   ALTREF_FRAME, NONE},

  {I4X4_PRED, INTRA_FRAME,  NONE},

John Koleszar's avatar
John Koleszar committed
92
  /* compound prediction modes */
Ronald S. Bultje's avatar
Ronald S. Bultje committed
93
94
95
  {ZEROMV,    LAST_FRAME,   ALTREF_FRAME},
  {NEARESTMV, LAST_FRAME,   ALTREF_FRAME},
  {NEARMV,    LAST_FRAME,   ALTREF_FRAME},
96

John Koleszar's avatar
John Koleszar committed
97
98
99
  {ZEROMV,    GOLDEN_FRAME, ALTREF_FRAME},
  {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
  {NEARMV,    GOLDEN_FRAME, ALTREF_FRAME},
100

Ronald S. Bultje's avatar
Ronald S. Bultje committed
101
  {NEWMV,     LAST_FRAME,   ALTREF_FRAME},
John Koleszar's avatar
John Koleszar committed
102
  {NEWMV,     GOLDEN_FRAME, ALTREF_FRAME},
103

Ronald S. Bultje's avatar
Ronald S. Bultje committed
104
  {SPLITMV,   LAST_FRAME,   ALTREF_FRAME},
105
  {SPLITMV,   GOLDEN_FRAME, ALTREF_FRAME},
John Koleszar's avatar
John Koleszar committed
106
107
};

108
109
110
111
112
113
114
115
116
117
118
// The baseline rd thresholds for breaking out of the rd loop for
// certain modes are assumed to be based on 8x8 blocks.
// This table is used to correct for blocks size.
// The factors here are << 2 (2 = x0.5, 32 = x8 etc).
static int rd_thresh_block_size_factor[BLOCK_SIZE_TYPES] =
  {2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32};

#define BASE_RD_THRESH_FREQ_FACT 16
#define MAX_RD_THRESH_FREQ_FACT 32
#define MAX_RD_THRESH_FREQ_INC 1

119
static void fill_token_costs(vp9_coeff_count (*c)[BLOCK_TYPES][2],
120
                             vp9_coeff_probs_model (*p)[BLOCK_TYPES]) {
121
  int i, j, k, l;
122
123
124
125
126
127
128
129
  TX_SIZE t;
  for (t = TX_4X4; t <= TX_32X32; t++)
    for (i = 0; i < BLOCK_TYPES; i++)
      for (j = 0; j < REF_TYPES; j++)
        for (k = 0; k < COEF_BANDS; k++)
          for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
            vp9_prob probs[ENTROPY_NODES];
            vp9_model_to_full_probs(p[t][i][j][k][l], probs);
130
            vp9_cost_tokens((int *)c[t][i][j][0][k][l], probs,
131
                            vp9_coef_tree);
132
#if CONFIG_BALANCED_COEFTREE
133
134
135
            // Replace the eob node prob with a very small value so that the
            // cost approximately equals the cost without the eob node
            probs[1] = 1;
136
            vp9_cost_tokens((int *)c[t][i][j][1][k][l], probs, vp9_coef_tree);
137
#else
138
            vp9_cost_tokens_skip((int *)c[t][i][j][1][k][l], probs,
139
                                 vp9_coef_tree);
140
141
            assert(c[t][i][j][0][k][l][DCT_EOB_TOKEN] ==
                   c[t][i][j][1][k][l][DCT_EOB_TOKEN]);
142
#endif
143
          }
144
145
}

146
147
148
149
static int rd_iifactor[32] =  { 4, 4, 3, 2, 1, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0, };
John Koleszar's avatar
John Koleszar committed
150

151
// 3* dc_qlookup[Q]*dc_qlookup[Q];
152

153
/* values are now correlated to quantizer */
Paul Wilkins's avatar
Paul Wilkins committed
154
155
156
static int sad_per_bit16lut[QINDEX_RANGE];
static int sad_per_bit4lut[QINDEX_RANGE];

157
void vp9_init_me_luts() {
John Koleszar's avatar
John Koleszar committed
158
159
160
161
162
163
164
  int i;

  // Initialize the sad lut tables using a formulaic calculation for now
  // This is to make it easier to resolve the impact of experimental changes
  // to the quantizer tables.
  for (i = 0; i < QINDEX_RANGE; i++) {
    sad_per_bit16lut[i] =
165
      (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
166
    sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742);
John Koleszar's avatar
John Koleszar committed
167
  }
Paul Wilkins's avatar
Paul Wilkins committed
168
}
John Koleszar's avatar
John Koleszar committed
169

170
static int compute_rd_mult(int qindex) {
171
  const int q = vp9_dc_quant(qindex, 0);
172
  return (11 * q * q) >> 2;
173
174
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
175
176
177
void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
  cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
  cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
John Koleszar's avatar
John Koleszar committed
178
179
}

180

Dmitry Kovalev's avatar
Dmitry Kovalev committed
181
void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
182
  int q, i, bsize;
John Koleszar's avatar
John Koleszar committed
183

184
  vp9_clear_system_state();  // __asm emms;
John Koleszar's avatar
John Koleszar committed
185

John Koleszar's avatar
John Koleszar committed
186
187
188
189
  // Further tests required to see if optimum is different
  // for key frames, golden frames and arf frames.
  // if (cpi->common.refresh_golden_frame ||
  //     cpi->common.refresh_alt_ref_frame)
190
  qindex = clamp(qindex, 0, MAXQ);
191

Dmitry Kovalev's avatar
Dmitry Kovalev committed
192
  cpi->RDMULT = compute_rd_mult(qindex);
John Koleszar's avatar
John Koleszar committed
193
194
195
196
197
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
    if (cpi->twopass.next_iiratio > 31)
      cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
    else
      cpi->RDMULT +=
Dmitry Kovalev's avatar
Dmitry Kovalev committed
198
          (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
John Koleszar's avatar
John Koleszar committed
199
  }
200
  cpi->mb.errorperbit = cpi->RDMULT >> 6;
John Koleszar's avatar
John Koleszar committed
201
  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
202

203
  vp9_set_speed_features(cpi);
John Koleszar's avatar
John Koleszar committed
204

Dmitry Kovalev's avatar
Dmitry Kovalev committed
205
206
  q = (int)pow(vp9_dc_quant(qindex, 0) >> 2, 1.25);
  q <<= 2;
John Koleszar's avatar
John Koleszar committed
207
208
  if (q < 8)
    q = 8;
209

John Koleszar's avatar
John Koleszar committed
210
211
212
  if (cpi->RDMULT > 1000) {
    cpi->RDDIV = 1;
    cpi->RDMULT /= 100;
John Koleszar's avatar
John Koleszar committed
213

214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
    for (bsize = 0; bsize < BLOCK_SIZE_TYPES; ++bsize) {
      for (i = 0; i < MAX_MODES; ++i) {
        // Threshold here seem unecessarily harsh but fine given actual
        // range of values used for cpi->sf.thresh_mult[]
        int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);

        // *4 relates to the scaling of rd_thresh_block_size_factor[]
        if ((int64_t)cpi->sf.thresh_mult[i] < thresh_max) {
          cpi->rd_threshes[bsize][i] =
            cpi->sf.thresh_mult[i] * q *
            rd_thresh_block_size_factor[bsize] / (4 * 100);
        } else {
          cpi->rd_threshes[bsize][i] = INT_MAX;
        }
        cpi->rd_baseline_thresh[bsize][i] = cpi->rd_threshes[bsize][i];
229

230
        if (cpi->sf.adaptive_rd_thresh)
Paul Wilkins's avatar
Paul Wilkins committed
231
232
233
          cpi->rd_thresh_freq_fact[bsize][i] = MAX_RD_THRESH_FREQ_FACT;
        else
          cpi->rd_thresh_freq_fact[bsize][i] = BASE_RD_THRESH_FREQ_FACT;
John Koleszar's avatar
John Koleszar committed
234
      }
John Koleszar's avatar
John Koleszar committed
235
    }
John Koleszar's avatar
John Koleszar committed
236
237
  } else {
    cpi->RDDIV = 100;
John Koleszar's avatar
John Koleszar committed
238

239
240
241
242
243
244
245
246
247
248
249
250
251
252
    for (bsize = 0; bsize < BLOCK_SIZE_TYPES; ++bsize) {
      for (i = 0; i < MAX_MODES; i++) {
        // Threshold here seem unecessarily harsh but fine given actual
        // range of values used for cpi->sf.thresh_mult[]
        int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);

        if (cpi->sf.thresh_mult[i] < thresh_max) {
          cpi->rd_threshes[bsize][i] =
            cpi->sf.thresh_mult[i] * q *
            rd_thresh_block_size_factor[bsize] / 4;
        } else {
          cpi->rd_threshes[bsize][i] = INT_MAX;
        }
        cpi->rd_baseline_thresh[bsize][i] = cpi->rd_threshes[bsize][i];
Paul Wilkins's avatar
Paul Wilkins committed
253

254
        if (cpi->sf.adaptive_rd_thresh)
Paul Wilkins's avatar
Paul Wilkins committed
255
256
257
          cpi->rd_thresh_freq_fact[bsize][i] = MAX_RD_THRESH_FREQ_FACT;
        else
          cpi->rd_thresh_freq_fact[bsize][i] = BASE_RD_THRESH_FREQ_FACT;
John Koleszar's avatar
John Koleszar committed
258
      }
John Koleszar's avatar
John Koleszar committed
259
    }
John Koleszar's avatar
John Koleszar committed
260
  }
John Koleszar's avatar
John Koleszar committed
261

262
  fill_token_costs(cpi->mb.token_costs, cpi->common.fc.coef_probs);
263

264
  for (i = 0; i < NUM_PARTITION_CONTEXTS; i++)
265
    vp9_cost_tokens(cpi->mb.partition_cost[i],
266
                    cpi->common.fc.partition_prob[cpi->common.frame_type][i],
267
268
                    vp9_partition_tree);

John Koleszar's avatar
John Koleszar committed
269
  /*rough estimate for costing*/
270
  vp9_init_mode_costs(cpi);
John Koleszar's avatar
John Koleszar committed
271

272
  if (cpi->common.frame_type != KEY_FRAME) {
273
    vp9_build_nmv_cost_table(
274
275
276
277
278
        cpi->mb.nmvjointcost,
        cpi->mb.e_mbd.allow_high_precision_mv ?
        cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
        &cpi->common.fc.nmvc,
        cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
279
280
281
282
283
284
285
286
287
288

    for (i = 0; i < INTER_MODE_CONTEXTS; i++) {
      MB_PREDICTION_MODE m;

      for (m = NEARESTMV; m < MB_MODE_COUNT; m++)
        cpi->mb.inter_mode_cost[i][m - NEARESTMV] =
            cost_token(vp9_sb_mv_ref_tree,
                       cpi->common.fc.inter_mode_probs[i],
                       vp9_sb_mv_ref_encoding_array - NEARESTMV + m);
    }
289
  }
John Koleszar's avatar
John Koleszar committed
290
291
}

Deb Mukherjee's avatar
Deb Mukherjee committed
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
static enum BlockSize get_block_size(int bw, int bh) {
  if (bw == 4 && bh == 4)
    return BLOCK_4X4;

  if (bw == 4 && bh == 8)
    return BLOCK_4X8;

  if (bw == 8 && bh == 4)
    return BLOCK_8X4;

  if (bw == 8 && bh == 8)
    return BLOCK_8X8;

  if (bw == 8 && bh == 16)
    return BLOCK_8X16;

  if (bw == 16 && bh == 8)
    return BLOCK_16X8;

  if (bw == 16 && bh == 16)
    return BLOCK_16X16;

  if (bw == 32 && bh == 32)
    return BLOCK_32X32;

  if (bw == 32 && bh == 16)
    return BLOCK_32X16;

  if (bw == 16 && bh == 32)
    return BLOCK_16X32;

  if (bw == 64 && bh == 32)
    return BLOCK_64X32;

  if (bw == 32 && bh == 64)
    return BLOCK_32X64;

  if (bw == 64 && bh == 64)
    return BLOCK_64X64;

  assert(0);
  return -1;
}

static enum BlockSize get_plane_block_size(BLOCK_SIZE_TYPE bsize,
                                           struct macroblockd_plane *pd) {
  return get_block_size(plane_block_width(bsize, pd),
                        plane_block_height(bsize, pd));
}

static double linear_interpolate(double x, int ntab, int inv_step,
                                 const double *tab) {
  double y = x * inv_step;
  int d = (int) y;
  if (d >= ntab - 1) {
    return tab[ntab - 1];
  } else {
    double a = y - d;
    return tab[d] * (1 - a) + tab[d + 1] * a;
  }
}

static double model_rate_norm(double x) {
  // Normalized rate
  // This function models the rate for a Laplacian source
  // source with given variance when quantized with a uniform quantizer
  // with given stepsize. The closed form expression is:
  // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
  // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
  // and H(x) is the binary entropy function.
  static const int inv_rate_tab_step = 8;
  static const double rate_tab[] = {
    64.00, 4.944, 3.949, 3.372, 2.966, 2.655, 2.403, 2.194,
    2.014, 1.858, 1.720, 1.596, 1.485, 1.384, 1.291, 1.206,
    1.127, 1.054, 0.986, 0.923, 0.863, 0.808, 0.756, 0.708,
    0.662, 0.619, 0.579, 0.541, 0.506, 0.473, 0.442, 0.412,
    0.385, 0.359, 0.335, 0.313, 0.291, 0.272, 0.253, 0.236,
    0.220, 0.204, 0.190, 0.177, 0.165, 0.153, 0.142, 0.132,
    0.123, 0.114, 0.106, 0.099, 0.091, 0.085, 0.079, 0.073,
    0.068, 0.063, 0.058, 0.054, 0.050, 0.047, 0.043, 0.040,
    0.037, 0.034, 0.032, 0.029, 0.027, 0.025, 0.023, 0.022,
    0.020, 0.019, 0.017, 0.016, 0.015, 0.014, 0.013, 0.012,
    0.011, 0.010, 0.009, 0.008, 0.008, 0.007, 0.007, 0.006,
    0.006, 0.005, 0.005, 0.005, 0.004, 0.004, 0.004, 0.003,
    0.003, 0.003, 0.003, 0.002, 0.002, 0.002, 0.002, 0.002,
    0.002, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001,
    0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.000,
  };
  const int rate_tab_num = sizeof(rate_tab)/sizeof(rate_tab[0]);
  assert(x >= 0.0);
  return linear_interpolate(x, rate_tab_num, inv_rate_tab_step, rate_tab);
}

static double model_dist_norm(double x) {
  // Normalized distortion
  // This function models the normalized distortion for a Laplacian source
  // source with given variance when quantized with a uniform quantizer
  // with given stepsize. The closed form expression is:
  // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
  // where x = qpstep / sqrt(variance)
  // Note the actual distortion is Dn * variance.
  static const int inv_dist_tab_step = 8;
  static const double dist_tab[] = {
    0.000, 0.001, 0.005, 0.012, 0.021, 0.032, 0.045, 0.061,
    0.079, 0.098, 0.119, 0.142, 0.166, 0.190, 0.216, 0.242,
    0.269, 0.296, 0.324, 0.351, 0.378, 0.405, 0.432, 0.458,
    0.484, 0.509, 0.534, 0.557, 0.580, 0.603, 0.624, 0.645,
    0.664, 0.683, 0.702, 0.719, 0.735, 0.751, 0.766, 0.780,
    0.794, 0.807, 0.819, 0.830, 0.841, 0.851, 0.861, 0.870,
    0.878, 0.886, 0.894, 0.901, 0.907, 0.913, 0.919, 0.925,
    0.930, 0.935, 0.939, 0.943, 0.947, 0.951, 0.954, 0.957,
    0.960, 0.963, 0.966, 0.968, 0.971, 0.973, 0.975, 0.976,
    0.978, 0.980, 0.981, 0.982, 0.984, 0.985, 0.986, 0.987,
    0.988, 0.989, 0.990, 0.990, 0.991, 0.992, 0.992, 0.993,
    0.993, 0.994, 0.994, 0.995, 0.995, 0.996, 0.996, 0.996,
    0.996, 0.997, 0.997, 0.997, 0.997, 0.998, 0.998, 0.998,
    0.998, 0.998, 0.998, 0.999, 0.999, 0.999, 0.999, 0.999,
    0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 1.000,
  };
  const int dist_tab_num = sizeof(dist_tab)/sizeof(dist_tab[0]);
  assert(x >= 0.0);
  return linear_interpolate(x, dist_tab_num, inv_dist_tab_step, dist_tab);
}

static void model_rd_from_var_lapndz(int var, int n, int qstep,
                                     int *rate, int64_t *dist) {
  // This function models the rate and distortion for a Laplacian
  // source with given variance when quantized with a uniform quantizer
  // with given stepsize. The closed form expressions are in:
  // Hang and Chen, "Source Model for transform video coder and its
  // application - Part I: Fundamental Theory", IEEE Trans. Circ.
  // Sys. for Video Tech., April 1997.
  vp9_clear_system_state();
  if (var == 0 || n == 0) {
    *rate = 0;
    *dist = 0;
  } else {
    double D, R;
    double s2 = (double) var / n;
    double x = qstep / sqrt(s2);
    D = model_dist_norm(x);
    R = model_rate_norm(x);
    if (R < 0) {
      R = 0;
      D = var;
    }
    *rate = (n * R * 256 + 0.5);
    *dist = (n * D * s2 + 0.5);
  }
  vp9_clear_system_state();
}

static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
                            MACROBLOCK *x, MACROBLOCKD *xd,
                            int *out_rate_sum, int64_t *out_dist_sum) {
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
  int i, rate_sum = 0, dist_sum = 0;

  for (i = 0; i < MAX_MB_PLANE; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &xd->plane[i];

    // TODO(dkovalev) the same code in get_plane_block_size
    const int bw = plane_block_width(bsize, pd);
    const int bh = plane_block_height(bsize, pd);
    const enum BlockSize bs = get_block_size(bw, bh);
    unsigned int sse;
    int rate;
    int64_t dist;
    (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
                              pd->dst.buf, pd->dst.stride, &sse);
    // sse works better than var, since there is no dc prediction used
    model_rd_from_var_lapndz(sse, bw * bh, pd->dequant[1] >> 3, &rate, &dist);

    rate_sum += rate;
    dist_sum += dist;
  }

  *out_rate_sum = rate_sum;
  *out_dist_sum = dist_sum << 4;
}

476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
                              MACROBLOCK *x, MACROBLOCKD *xd,
                              int *out_rate_sum, int64_t *out_dist_sum) {
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];

  // TODO(dkovalev) the same code in get_plane_block_size
  const int bw = plane_block_width(bsize, pd);
  const int bh = plane_block_height(bsize, pd);
  const enum BlockSize bs = get_block_size(bw, bh);
  unsigned int sse;
  int rate;
  int64_t dist;
  (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
                            pd->dst.buf, pd->dst.stride, &sse);
  // sse works better than var, since there is no dc prediction used
  model_rd_from_var_lapndz(sse, bw * bh, pd->dequant[1] >> 3, &rate, &dist);

  *out_rate_sum = rate;
  *out_dist_sum = dist << 4;
}

Deb Mukherjee's avatar
Deb Mukherjee committed
501
502
503
504
505
static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
                                 TX_SIZE tx_size,
                                 MACROBLOCK *x, MACROBLOCKD *xd,
                                 int *out_rate_sum, int64_t *out_dist_sum,
                                 int *out_skip) {
506
507
  int t = 4, j, k;
  enum BlockSize bs = BLOCK_4X4;
Deb Mukherjee's avatar
Deb Mukherjee committed
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];
  const int bw = plane_block_width(bsize, pd);
  const int bh = plane_block_height(bsize, pd);
  int rate_sum = 0;
  int64_t dist_sum = 0;

  if (tx_size == TX_4X4) {
    bs = BLOCK_4X4;
    t = 4;
  } else if (tx_size == TX_8X8) {
    bs = BLOCK_8X8;
    t = 8;
  } else if (tx_size == TX_16X16) {
    bs = BLOCK_16X16;
    t = 16;
  } else if (tx_size == TX_32X32) {
    bs = BLOCK_32X32;
    t = 32;
  } else {
    assert(0);
  }
  assert(bs <= get_block_size(bw, bh));
  *out_skip = 1;
  for (j = 0; j < bh; j+=t) {
    for (k = 0; k < bw; k+=t) {
      int rate;
      int64_t dist;
      unsigned int sse;
      (void) cpi->fn_ptr[bs].vf(p->src.buf + j * p->src.stride + k,
                                p->src.stride,
                                pd->dst.buf + j * pd->dst.stride + k,
                                pd->dst.stride, &sse);
      // sse works better than var, since there is no dc prediction used
      model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3,
                               &rate, &dist);
      rate_sum += rate;
      dist_sum += dist;
      *out_skip &= (rate < 1024);
    }
  }
  *out_rate_sum = rate_sum;
  *out_dist_sum = (dist_sum << 4);
}

Ronald S. Bultje's avatar
Ronald S. Bultje committed
553
int64_t vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff,
554
                          intptr_t block_size, int64_t *ssz) {
Ronald S. Bultje's avatar
Ronald S. Bultje committed
555
  int i;
556
  int64_t error = 0, sqcoeff = 0;
John Koleszar's avatar
John Koleszar committed
557

558
  for (i = 0; i < block_size; i++) {
John Koleszar's avatar
John Koleszar committed
559
    int this_diff = coeff[i] - dqcoeff[i];
Ronald S. Bultje's avatar
Ronald S. Bultje committed
560
    error += (unsigned)this_diff * this_diff;
561
    sqcoeff += (unsigned) coeff[i] * coeff[i];
John Koleszar's avatar
John Koleszar committed
562
  }
John Koleszar's avatar
John Koleszar committed
563

564
  *ssz = sqcoeff;
John Koleszar's avatar
John Koleszar committed
565
  return error;
John Koleszar's avatar
John Koleszar committed
566
567
}

568
static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
569
                              int plane, int block, PLANE_TYPE type,
570
571
                              ENTROPY_CONTEXT *A,
                              ENTROPY_CONTEXT *L,
John Koleszar's avatar
John Koleszar committed
572
573
                              TX_SIZE tx_size,
                              int y_blocks) {
574
  MACROBLOCKD *const xd = &mb->e_mbd;
575
576
  MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
  int pt;
577
  int c = 0;
578
  int cost = 0;
579
  const int16_t *scan = NULL, *nb;
580
  const int eob = xd->plane[plane].eobs[block];
581
  const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff, block, 16);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
582
  const int ref = mbmi->ref_frame[0] != INTRA_FRAME;
583
584
  unsigned int (*token_costs)[COEF_BANDS][PREV_COEF_CONTEXTS]
                    [MAX_ENTROPY_TOKENS] = mb->token_costs[tx_size][type][ref];
585
  ENTROPY_CONTEXT above_ec = 0, left_ec = 0;
586
  TX_TYPE tx_type = DCT_DCT;
587
  const int segment_id = xd->mode_info_context->mbmi.segment_id;
588
  int seg_eob = 0;
589
  uint8_t token_cache[1024];
590
  const uint8_t *band_translate = NULL;
591
592

  // Check for consistency of tx_size with mode info
593
  assert((!type && !plane) || (type && plane));
594
595
596
  if (type == PLANE_TYPE_Y_WITH_DC) {
    assert(xd->mode_info_context->mbmi.txfm_size == tx_size);
  } else {
597
    assert(tx_size == get_uv_tx_size(mbmi));
598
599
  }

600
  switch (tx_size) {
601
    case TX_4X4: {
602
      tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
603
          get_tx_type_4x4(xd, block) : DCT_DCT;
604
605
      above_ec = A[0] != 0;
      left_ec = L[0] != 0;
606
      seg_eob = 16;
607
      scan = get_scan_4x4(tx_type);
Paul Wilkins's avatar
Paul Wilkins committed
608
      band_translate = vp9_coefband_trans_4x4;
Daniel Kang's avatar
Daniel Kang committed
609
      break;
610
    }
611
    case TX_8X8: {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
612
613
      const TX_TYPE tx_type = type == PLANE_TYPE_Y_WITH_DC ?
                                  get_tx_type_8x8(xd) : DCT_DCT;
614
615
      above_ec = (A[0] + A[1]) != 0;
      left_ec = (L[0] + L[1]) != 0;
616
      scan = get_scan_8x8(tx_type);
617
      seg_eob = 64;
Paul Wilkins's avatar
Paul Wilkins committed
618
      band_translate = vp9_coefband_trans_8x8plus;
Daniel Kang's avatar
Daniel Kang committed
619
      break;
620
621
    }
    case TX_16X16: {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
622
623
      const TX_TYPE tx_type = type == PLANE_TYPE_Y_WITH_DC ?
                                  get_tx_type_16x16(xd) : DCT_DCT;
624
      scan = get_scan_16x16(tx_type);
625
      seg_eob = 256;
626
627
      above_ec = (A[0] + A[1] + A[2] + A[3]) != 0;
      left_ec = (L[0] + L[1] + L[2] + L[3]) != 0;
Paul Wilkins's avatar
Paul Wilkins committed
628
      band_translate = vp9_coefband_trans_8x8plus;
Daniel Kang's avatar
Daniel Kang committed
629
      break;
630
    }
631
    case TX_32X32:
Paul Wilkins's avatar
Paul Wilkins committed
632
      scan = vp9_default_scan_32x32;
633
      seg_eob = 1024;
634
635
      above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0;
      left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0;
Paul Wilkins's avatar
Paul Wilkins committed
636
      band_translate = vp9_coefband_trans_8x8plus;
637
      break;
Daniel Kang's avatar
Daniel Kang committed
638
    default:
Dmitry Kovalev's avatar
Dmitry Kovalev committed
639
      assert(0);
Daniel Kang's avatar
Daniel Kang committed
640
641
      break;
  }
John Koleszar's avatar
John Koleszar committed
642
  assert(eob <= seg_eob);
643

644
  pt = combine_entropy_contexts(above_ec, left_ec);
645
  nb = vp9_get_coef_neighbors_handle(scan);
646

647
  if (vp9_segfeature_active(&xd->seg, segment_id, SEG_LVL_SKIP))
648
    seg_eob = 0;
649

650
651
652
653
  /* sanity check to ensure that we do not have spurious non-zero q values */
  if (eob < seg_eob)
    assert(qcoeff_ptr[scan[eob]] == 0);

654
655
656
657
  if (eob == 0) {
    // single eob token
    cost += token_costs[0][0][pt][DCT_EOB_TOKEN];
  } else {
658
    int v, prev_t;
659
660
661

    // dc token
    v = qcoeff_ptr[0];
662
663
664
    prev_t = vp9_dct_value_tokens_ptr[v].token;
    cost += token_costs[0][0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
    token_cache[0] = vp9_pt_energy_class[prev_t];
665
666
667
668

    // ac tokens
    for (c = 1; c < eob; c++) {
      const int rc = scan[c];
669
670
      const int band = get_coef_band(band_translate, c);
      int t;
671
672
673

      v = qcoeff_ptr[rc];
      t = vp9_dct_value_tokens_ptr[v].token;
674
      pt = get_coef_context(nb, token_cache, c);
675
      cost += token_costs[!prev_t][band][pt][t] + vp9_dct_value_cost_ptr[v];
676
      token_cache[rc] = vp9_pt_energy_class[t];
677
      prev_t = t;
678
    }
679
680

    // eob token
681
    if (c < seg_eob) {
682
      pt = get_coef_context(nb, token_cache, c);
683
684
      cost += token_costs[0][get_coef_band(band_translate, c)][pt]
                         [DCT_EOB_TOKEN];
685
    }
686
687
  }

688
689
690
  // is eob first coefficient;
  for (pt = 0; pt < (1 << tx_size); pt++) {
    A[pt] = L[pt] = c > 0;
691
  }
692

693
694
695
  return cost;
}

Deb Mukherjee's avatar
Deb Mukherjee committed
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
struct rdcost_block_args {
  VP9_COMMON *cm;
  MACROBLOCK *x;
  ENTROPY_CONTEXT t_above[16];
  ENTROPY_CONTEXT t_left[16];
  TX_SIZE tx_size;
  int bw;
  int bh;
  int rate;
  int64_t dist;
  int64_t sse;
  int64_t best_rd;
  int skip;
};

static void dist_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
                       int ss_txfrm_size, void *arg) {
  struct rdcost_block_args* args = arg;
  MACROBLOCK* const x = args->x;
  MACROBLOCKD* const xd = &x->e_mbd;
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];
  int64_t this_sse;
  int shift = args->tx_size == TX_32X32 ? 0 : 2;
  int16_t *const coeff = BLOCK_OFFSET(p->coeff, block, 16);
  int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block, 16);
  args->dist += vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
                                &this_sse) >> shift;
  args->sse += this_sse >> shift;
}

static void rate_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
                       int ss_txfrm_size, void *arg) {
  struct rdcost_block_args* args = arg;
  int x_idx, y_idx;
  MACROBLOCKD * const xd = &args->x->e_mbd;

  txfrm_block_to_raster_xy(xd, bsize, plane, block, args->tx_size * 2, &x_idx,
                           &y_idx);

  args->rate += cost_coeffs(args->cm, args->x, plane, block,
                            xd->plane[plane].plane_type, args->t_above + x_idx,
                            args->t_left + y_idx, args->tx_size,
                            args->bw * args->bh);
}

742
743
// FIXME(jingning): need to make the rd test of chroma components consistent
// with that of luma component. this function should be deprecated afterwards.
Deb Mukherjee's avatar
Deb Mukherjee committed
744
745
746
747
748
749
750
static int rdcost_plane(VP9_COMMON * const cm, MACROBLOCK *x, int plane,
                        BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
  MACROBLOCKD * const xd = &x->e_mbd;
  const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
  const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y;
  const int bw = 1 << bwl, bh = 1 << bhl;
  struct rdcost_block_args args = { cm, x, { 0 }, { 0 }, tx_size, bw, bh,
751
    0, 0, 0, INT64_MAX, 0 };
Deb Mukherjee's avatar
Deb Mukherjee committed
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821

  vpx_memcpy(&args.t_above, xd->plane[plane].above_context,
             sizeof(ENTROPY_CONTEXT) * bw);
  vpx_memcpy(&args.t_left, xd->plane[plane].left_context,
             sizeof(ENTROPY_CONTEXT) * bh);

  foreach_transformed_block_in_plane(xd, bsize, plane, rate_block, &args);
  return args.rate;
}

static int rdcost_uv(VP9_COMMON *const cm, MACROBLOCK *x,
                     BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
  int cost = 0, plane;

  for (plane = 1; plane < MAX_MB_PLANE; plane++) {
    cost += rdcost_plane(cm, x, plane, bsize, tx_size);
  }
  return cost;
}

static int block_error(int16_t *coeff, int16_t *dqcoeff,
                       int block_size, int shift) {
  int i;
  int64_t error = 0;

  for (i = 0; i < block_size; i++) {
    int this_diff = coeff[i] - dqcoeff[i];
    error += (unsigned)this_diff * this_diff;
  }
  error >>= shift;

  return error > INT_MAX ? INT_MAX : (int)error;
}

static int block_error_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize,
                           int shift, int64_t *sse) {
  struct macroblockd_plane *p = &x->e_mbd.plane[0];
  const int bw = plane_block_width(bsize, p);
  const int bh = plane_block_height(bsize, p);
  int64_t e = vp9_block_error(x->plane[0].coeff, x->e_mbd.plane[0].dqcoeff,
                              bw * bh, sse) >> shift;
  *sse >>= shift;
  return e;
}

static int64_t block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize,
                                int shift, int64_t *sse) {
  int64_t sum = 0, this_sse;
  int plane;

  *sse = 0;
  for (plane = 1; plane < MAX_MB_PLANE; plane++) {
    struct macroblockd_plane *p = &x->e_mbd.plane[plane];
    const int bw = plane_block_width(bsize, p);
    const int bh = plane_block_height(bsize, p);
    sum += vp9_block_error(x->plane[plane].coeff, x->e_mbd.plane[plane].dqcoeff,
                           bw * bh, &this_sse);
    *sse += this_sse;
  }
  *sse >>= shift;
  return sum >> shift;
}

static void block_yrd_txfm(int plane, int block, BLOCK_SIZE_TYPE bsize,
                           int ss_txfrm_size, void *arg) {
  struct rdcost_block_args *args = arg;
  MACROBLOCK *const x = args->x;
  MACROBLOCKD *const xd = &x->e_mbd;
  struct encode_b_args encode_args = {args->cm, x, NULL};

822
823
824
825
826
827
828
829
830
831
  if (args->skip)
    return;
  if (RDCOST(x->rdmult, x->rddiv, args->rate, args->dist) > args->best_rd) {
    args->skip = 1;
    args->rate = INT_MAX;
    args->dist = INT64_MAX;
    args->sse  = INT64_MAX;
    return;
  }

Deb Mukherjee's avatar
Deb Mukherjee committed
832
833
834
835
836
837
838
839
840
841
842
843
  if (xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME)
    encode_block_intra(plane, block, bsize, ss_txfrm_size, &encode_args);
  else
    xform_quant(plane, block, bsize, ss_txfrm_size, &encode_args);

  dist_block(plane, block, bsize, ss_txfrm_size, args);
  rate_block(plane, block, bsize, ss_txfrm_size, args);
}

static void super_block_yrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
                                     int *rate, int64_t *distortion,
                                     int *skippable, int64_t *sse,
844
                                     int64_t ref_best_rd,
Deb Mukherjee's avatar
Deb Mukherjee committed
845
846
847
848
849
850
851
                                     BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
  MACROBLOCKD *const xd = &x->e_mbd;
  struct macroblockd_plane *const pd = &xd->plane[0];
  const int bwl = b_width_log2(bsize) - xd->plane[0].subsampling_x;
  const int bhl = b_height_log2(bsize) - xd->plane[0].subsampling_y;
  const int bw = 1 << bwl, bh = 1 << bhl;
  struct rdcost_block_args args = { cm, x, { 0 }, { 0 }, tx_size, bw, bh,
852
                                    0, 0, 0, ref_best_rd, 0 };
Deb Mukherjee's avatar
Deb Mukherjee committed
853
854
855
856
857
858
859
860
  xd->mode_info_context->mbmi.txfm_size = tx_size;
  vpx_memcpy(&args.t_above, pd->above_context, sizeof(ENTROPY_CONTEXT) * bw);
  vpx_memcpy(&args.t_left, pd->left_context, sizeof(ENTROPY_CONTEXT) * bh);

  foreach_transformed_block_in_plane(xd, bsize, 0, block_yrd_txfm, &args);
  *distortion = args.dist;
  *rate       = args.rate;
  *sse        = args.sse;
861
  *skippable  = vp9_sby_is_skippable(xd, bsize) && (!args.skip);
Deb Mukherjee's avatar
Deb Mukherjee committed
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
}

static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x,
                                     int *rate, int64_t *distortion,
                                     int *skip, int64_t *sse,
                                     BLOCK_SIZE_TYPE bs) {
  const TX_SIZE max_txfm_size = TX_32X32
      - (bs < BLOCK_SIZE_SB32X32) - (bs < BLOCK_SIZE_MB16X16);
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
  if (max_txfm_size == TX_32X32 &&
      (cm->txfm_mode == ALLOW_32X32 ||
       cm->txfm_mode == TX_MODE_SELECT)) {
    mbmi->txfm_size = TX_32X32;
  } else if (max_txfm_size >= TX_16X16 &&
             (cm->txfm_mode == ALLOW_16X16 ||
              cm->txfm_mode == ALLOW_32X32 ||
              cm->txfm_mode == TX_MODE_SELECT)) {
    mbmi->txfm_size = TX_16X16;
  } else if (cm->txfm_mode != ONLY_4X4) {
    mbmi->txfm_size = TX_8X8;
  } else {
    mbmi->txfm_size = TX_4X4;
  }
  super_block_yrd_for_txfm(cm, x, rate, distortion, skip,
888
                           &sse[mbmi->txfm_size], INT64_MAX, bs,
Deb Mukherjee's avatar
Deb Mukherjee committed
889
890
891
892
                           mbmi->txfm_size);
  cpi->txfm_stepdown_count[0]++;
}

893
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
894
                                     int (*r)[2], int *rate,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
895
                                     int64_t *d, int64_t *distortion,
896
897
                                     int *s, int *skip,
                                     int64_t txfm_cache[NB_TXFM_MODES],
Deb Mukherjee's avatar
Deb Mukherjee committed
898
899
900
                                     BLOCK_SIZE_TYPE bs) {
  const TX_SIZE max_txfm_size = TX_32X32
      - (bs < BLOCK_SIZE_SB32X32) - (bs < BLOCK_SIZE_MB16X16);
901
902
903
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
904
  vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd);
905
906
  int64_t rd[TX_SIZE_MAX_SB][2];
  int n, m;
907
  int s0, s1;
908

909
  const vp9_prob *tx_probs = vp9_get_pred_probs_tx_size(cm, xd);
910

911
912
  for (n = TX_4X4; n <= max_txfm_size; n++) {
    r[n][1] = r[n][0];
913
914
    if (r[n][0] == INT_MAX)
      continue;
915
916
    for (m = 0; m <= n - (n == max_txfm_size); m++) {
      if (m == n)
917
        r[n][1] += vp9_cost_zero(tx_probs[m]);
918
      else
919
        r[n][1] += vp9_cost_one(tx_probs[m]);
920
921
    }
  }
922

923
924
925
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
926

927
  for (n = TX_4X4; n <= max_txfm_size; n++) {
928
929
930
931
    if (d[n] == INT64_MAX) {
      rd[n][0] = rd[n][1] = INT64_MAX;
      continue;
    }
932
933
934
935
936
    if (s[n]) {
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
937
938
939
    }
  }

940
941
942
943
944
945
  if (max_txfm_size == TX_32X32 &&
      (cm->txfm_mode == ALLOW_32X32 ||
       (cm->txfm_mode == TX_MODE_SELECT &&
        rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
        rd[TX_32X32][1] < rd[TX_4X4][1]))) {
    mbmi->txfm_size = TX_32X32;
946
947
948
949
950
951
  } else if (max_txfm_size >= TX_16X16 &&
             (cm->txfm_mode == ALLOW_16X16 ||
              cm->txfm_mode == ALLOW_32X32 ||
              (cm->txfm_mode == TX_MODE_SELECT &&
               rd[TX_16X16][1] < rd[TX_8X8][1] &&
               rd[TX_16X16][1] < rd[TX_4X4][1]))) {
952
    mbmi->txfm_size = TX_16X16;
953
  } else if (cm->txfm_mode == ALLOW_8X8 ||
954
955
             cm->txfm_mode == ALLOW_16X16 ||
             cm->txfm_mode == ALLOW_32X32 ||
956
           (cm->txfm_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
957
958
959
960
961
    mbmi->txfm_size = TX_8X8;
  } else {
    mbmi->txfm_size = TX_4X4;
  }

962
  *distortion = d[mbmi->txfm_size];
963
  *rate       = r[mbmi->txfm_size][cm->txfm_mode == TX_MODE_SELECT];
964
965
  *skip       = s[mbmi->txfm_size];

966
967
  txfm_cache[ONLY_4X4] = rd[TX_4X4][0];
  txfm_cache[ALLOW_8X8] = rd[TX_8X8][0];
968
969
  txfm_cache[ALLOW_16X16] = rd[MIN(max_txfm_size, TX_16X16)][0];
  txfm_cache[ALLOW_32X32] = rd[MIN(max_txfm_size, TX_32X32)][0];
970
971
972
973
  if (max_txfm_size == TX_32X32 &&
      rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
      rd[TX_32X32][1] < rd[TX_4X4][1])
    txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
974
975
  else if (max_txfm_size >= TX_16X16 &&
           rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
976
    txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
977
  else
978
979
    txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
                                 rd[TX_4X4][1] : rd[TX_8X8][1];
980

Deb Mukherjee's avatar
Deb Mukherjee committed
981
982
983
984
985
986
987
988
989
990
991
992
993
  if (max_txfm_size == TX_32X32 &&
      rd[TX_32X32][1] < rd[TX_16X16][1] &&
      rd[TX_32X32][1] < rd[TX_8X8][1] &&
      rd[TX_32X32][1] < rd[TX_4X4][1]) {
    cpi->txfm_stepdown_count[0]++;
  } else if (max_txfm_size >= TX_16X16 &&
             rd[TX_16X16][1] < rd[TX_8X8][1] &&
             rd[TX_16X16][1] < rd[TX_4X4][1]) {
    cpi->txfm_stepdown_count[max_txfm_size - TX_16X16]++;
  } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
    cpi->txfm_stepdown_count[max_txfm_size - TX_8X8]++;
  } else {
    cpi->txfm_stepdown_count[max_txfm_size - TX_4X4]++;
994
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
995
}
996

Deb Mukherjee's avatar
Deb Mukherjee committed
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
                                          int (*r)[2], int *rate,
                                          int64_t *d, int64_t *distortion,
                                          int *s, int *skip, int64_t *sse,
                                          BLOCK_SIZE_TYPE bs,
                                          int *model_used) {
  const TX_SIZE max_txfm_size = TX_32X32
      - (bs < BLOCK_SIZE_SB32X32) - (bs < BLOCK_SIZE_MB16X16);
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
1008
  vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd);
Deb Mukherjee's avatar
Deb Mukherjee committed
1009
1010
1011
1012
1013
  int64_t rd[TX_SIZE_MAX_SB][2];
  int n, m;
  int s0, s1;
  double scale_rd[TX_SIZE_MAX_SB] = {1.73, 1.44, 1.20, 1.00};
  // double scale_r[TX_SIZE_MAX_SB] = {2.82, 2.00, 1.41, 1.00};
1014

1015
  const vp9_prob *tx_probs = vp9_get_pred_probs_tx_size(cm, xd);
1016

Deb Mukherjee's avatar
Deb Mukherjee committed
1017
1018
  // for (n = TX_4X4; n <= max_txfm_size; n++)
  //   r[n][0] = (r[n][0] * scale_r[n]);
1019

Deb Mukherjee's avatar
Deb Mukherjee committed
1020
1021
1022
1023
1024
1025
1026
1027
  for (n = TX_4X4; n <= max_txfm_size; n++) {
    r[n][1] = r[n][0];
    for (m = 0; m <= n - (n == max_txfm_size); m++) {
      if (m == n)
        r[n][1] += vp9_cost_zero(tx_probs[m]);
      else
        r[n][1] += vp9_cost_one(tx_probs[m]);
    }
1028
  }
1029

Deb Mukherjee's avatar
Deb Mukherjee committed
1030
1031
1032
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
1033

Deb Mukherjee's avatar
Deb Mukherjee committed
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
  for (n = TX_4X4; n <= max_txfm_size; n++) {
    if (s[n]) {
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
    }
  }
  for (n = TX_4X4; n <= max_txfm_size; n++) {
    rd[n][0] = (scale_rd[n] * rd[n][0]);
    rd[n][1] = (scale_rd[n] * rd[n][1]);
  }
1046

Deb Mukherjee's avatar
Deb Mukherjee committed
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
  if (max_txfm_size == TX_32X32 &&
      (cm->txfm_mode == ALLOW_32X32 ||
       (cm->txfm_mode == TX_MODE_SELECT &&
        rd[TX_32X32][1] <= rd[TX_16X16][1] &&
        rd[TX_32X32][1] <= rd[TX_8X8][1] &&
        rd[TX_32X32][1] <= rd[TX_4X4][1]))) {
    mbmi->txfm_size = TX_32X32;
  } else if (max_txfm_size >= TX_16X16 &&
             (cm->txfm_mode == ALLOW_16X16 ||
              cm->txfm_mode == ALLOW_32X32 ||
              (cm->txfm_mode == TX_MODE_SELECT &&
               rd[TX_16X16][1] <= rd[TX_8X8][1] &&
               rd[TX_16X16][1] <= rd[TX_4X4][1]))) {
    mbmi->txfm_size = TX_16X16;
  } else if (cm->txfm_mode == ALLOW_8X8 ||
             cm->txfm_mode == ALLOW_16X16 ||
             cm->txfm_mode == ALLOW_32X32 ||
           (cm->txfm_mode == TX_MODE_SELECT &&
            rd[TX_8X8][1] <= rd[TX_4X4][1])) {
    mbmi->txfm_size = TX_8X8;
  } else {
    mbmi->txfm_size = TX_4X4;
  }
1070

Deb Mukherjee's avatar
Deb Mukherjee committed
1071
1072
1073
1074
  if (model_used[mbmi->txfm_size]) {
    // Actually encode using the chosen mode if a model was used, but do not
    // update the r, d costs
    super_block_yrd_for_txfm(cm, x, rate, distortion, skip,
1075
1076
                             &sse[mbmi->txfm_size], INT64_MAX,
                             bs, mbmi->txfm_size);
Deb Mukherjee's avatar
Deb Mukherjee committed
1077
1078
1079
1080
1081
  } else {
    *distortion = d[mbmi->txfm_size];
    *rate       = r[mbmi->txfm_size][cm->txfm_mode == TX_MODE_SELECT];
    *skip       = s[mbmi->txfm_size];
  }
1082

Deb Mukherjee's avatar
Deb Mukherjee committed
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
  if (max_txfm_size == TX_32X32 &&
      rd[TX_32X32][1] <= rd[TX_16X16][1] &&
      rd[TX_32X32][1] <= rd[TX_8X8][1] &&
      rd[TX_32X32][1] <= rd[TX_4X4][1]) {
    cpi->txfm_stepdown_count[0]++;
  } else if (max_txfm_size >= TX_16X16 &&
             rd[TX_16X16][1] <= rd[TX_8X8][1] &&
             rd[TX_16X16][1] <= rd[TX_4X4][1]) {
    cpi->txfm_stepdown_count[max_txfm_size - TX_16X16]++;
  } else if (rd[TX_8X8][1] <= rd[TX_4X4][1]) {
    cpi->txfm_stepdown_count[max_txfm_size - TX_8X8]++;
  } else {
    cpi->txfm_stepdown_count[max_txfm_size - TX_4X4]++;
  }
1097
1098
}

1099
static void super_block_yrd(VP9_COMP *cpi,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
1100
                            MACROBLOCK *x, int *rate, int64_t *distortion,
1101
                            int *skip, int64_t *psse, BLOCK_SIZE_TYPE bs,
1102
1103
                            int64_t txfm_cache[NB_TXFM_MODES],
                            int64_t ref_best_rd) {
1104
  VP9_COMMON *const cm = &cpi->common;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
1105
  int r[TX_SIZE_MAX_SB][2], s[TX_SIZE_MAX_SB];
1106
  int64_t d[TX_SIZE_MAX_SB], sse[TX_SIZE_MAX_SB];
Jim Bankoski's avatar
Jim Bankoski committed
1107
1108
  MACROBLOCKD *xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
1109

1110
  assert(bs == mbmi->sb_type);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
1111
  if (mbmi->ref_frame[0] > INTRA_FRAME)
1112
    vp9_subtract_sby(x, bs);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
1113

Deb Mukherjee's avatar
Deb Mukherjee committed
1114
1115
1116
  if (cpi->sf.tx_size_search_method == USE_LARGESTALL ||
      (cpi->sf.tx_size_search_method != USE_FULL_RD &&
       mbmi->ref_frame[0] == INTRA_FRAME)) {
1117
    vpx_memset(txfm_cache, 0, NB_TXFM_MODES * sizeof(int64_t));
Deb Mukherjee's avatar
Deb Mukherjee committed
1118
    choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse, bs);
1119
    if (psse)
Deb Mukherjee's avatar
Deb Mukherjee committed
1120
      *psse = sse[mbmi->txfm_size];