vp9_rdopt.c 166 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5 6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9 10 11 12 13 14
 */

#include <stdio.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
15

16
#include "vp9/common/vp9_pragmas.h"
17 18 19 20 21
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_modecosts.h"
#include "vp9/encoder/vp9_encodeintra.h"
22 23 24 25 26
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_quant_common.h"
27 28 29 30 31 32
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_variance.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_ratectrl.h"
John Koleszar's avatar
John Koleszar committed
33
#include "vpx_mem/vpx_mem.h"
34 35 36 37 38
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_entropy.h"
39
#include "./vp9_rtcd.h"
40
#include "vp9/common/vp9_mvref_common.h"
Ronald S. Bultje's avatar
Ronald S. Bultje committed
41
#include "vp9/common/vp9_common.h"
Paul Wilkins's avatar
Paul Wilkins committed
42

43 44
#define INVALID_MV 0x80008000

45 46 47
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1

48 49 50
#define LAST_FRAME_MODE_MASK    0xFFEDCD60
#define GOLDEN_FRAME_MODE_MASK  0xFFDA3BB0
#define ALT_REF_MODE_MASK       0xFFC648D0
51

Paul Wilkins's avatar
Paul Wilkins committed
52 53
#define MIN_EARLY_TERM_INDEX    3

54
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
  {NEARESTMV, LAST_FRAME,   NONE},
  {NEARESTMV, ALTREF_FRAME, NONE},
  {NEARESTMV, GOLDEN_FRAME, NONE},

  {DC_PRED,   INTRA_FRAME,  NONE},

  {NEWMV,     LAST_FRAME,   NONE},
  {NEWMV,     ALTREF_FRAME, NONE},
  {NEWMV,     GOLDEN_FRAME, NONE},

  {NEARMV,    LAST_FRAME,   NONE},
  {NEARMV,    ALTREF_FRAME, NONE},
  {NEARESTMV, LAST_FRAME,   ALTREF_FRAME},
  {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},

  {TM_PRED,   INTRA_FRAME,  NONE},

  {NEARMV,    LAST_FRAME,   ALTREF_FRAME},
  {NEWMV,     LAST_FRAME,   ALTREF_FRAME},
  {NEARMV,    GOLDEN_FRAME, NONE},
  {NEARMV,    GOLDEN_FRAME, ALTREF_FRAME},
  {NEWMV,     GOLDEN_FRAME, ALTREF_FRAME},

  {ZEROMV,    LAST_FRAME,   NONE},
  {ZEROMV,    GOLDEN_FRAME, NONE},
  {ZEROMV,    ALTREF_FRAME, NONE},
  {ZEROMV,    LAST_FRAME,   ALTREF_FRAME},
  {ZEROMV,    GOLDEN_FRAME, ALTREF_FRAME},

  {H_PRED,    INTRA_FRAME,  NONE},
  {V_PRED,    INTRA_FRAME,  NONE},
  {D135_PRED, INTRA_FRAME,  NONE},
  {D207_PRED, INTRA_FRAME,  NONE},
  {D153_PRED, INTRA_FRAME,  NONE},
  {D63_PRED,  INTRA_FRAME,  NONE},
  {D117_PRED, INTRA_FRAME,  NONE},
  {D45_PRED,  INTRA_FRAME,  NONE},
};

const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
  {LAST_FRAME,   NONE},
  {GOLDEN_FRAME, NONE},
  {ALTREF_FRAME, NONE},
  {LAST_FRAME,   ALTREF_FRAME},
  {GOLDEN_FRAME, ALTREF_FRAME},
  {INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
101 102
};

103 104 105 106
// The baseline rd thresholds for breaking out of the rd loop for
// certain modes are assumed to be based on 8x8 blocks.
// This table is used to correct for blocks size.
// The factors here are << 2 (2 = x0.5, 32 = x8 etc).
107
static int rd_thresh_block_size_factor[BLOCK_SIZES] =
108 109
  {2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32};

Deb Mukherjee's avatar
Deb Mukherjee committed
110 111 112
#define RD_THRESH_MAX_FACT 64
#define RD_THRESH_INC      1
#define RD_THRESH_POW      1.25
113
#define RD_MULT_EPB_RATIO  64
Deb Mukherjee's avatar
Deb Mukherjee committed
114

115 116
#define MV_COST_WEIGHT      108
#define MV_COST_WEIGHT_SUB  120
117

118
static void fill_token_costs(vp9_coeff_cost *c,
119
                             vp9_coeff_probs_model (*p)[BLOCK_TYPES]) {
120
  int i, j, k, l;
121 122 123 124 125 126 127 128
  TX_SIZE t;
  for (t = TX_4X4; t <= TX_32X32; t++)
    for (i = 0; i < BLOCK_TYPES; i++)
      for (j = 0; j < REF_TYPES; j++)
        for (k = 0; k < COEF_BANDS; k++)
          for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
            vp9_prob probs[ENTROPY_NODES];
            vp9_model_to_full_probs(p[t][i][j][k][l], probs);
129
            vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs,
130
                            vp9_coef_tree);
131
            vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
132
                                 vp9_coef_tree);
133 134
            assert(c[t][i][j][k][0][l][DCT_EOB_TOKEN] ==
                   c[t][i][j][k][1][l][DCT_EOB_TOKEN]);
135
          }
136 137
}

138 139 140 141 142 143
static const int rd_iifactor[32] = {
  4, 4, 3, 2, 1, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, 0,
};
John Koleszar's avatar
John Koleszar committed
144

145
// 3* dc_qlookup[Q]*dc_qlookup[Q];
146

147
/* values are now correlated to quantizer */
Paul Wilkins's avatar
Paul Wilkins committed
148 149 150
static int sad_per_bit16lut[QINDEX_RANGE];
static int sad_per_bit4lut[QINDEX_RANGE];

151
void vp9_init_me_luts() {
John Koleszar's avatar
John Koleszar committed
152 153 154 155 156 157 158
  int i;

  // Initialize the sad lut tables using a formulaic calculation for now
  // This is to make it easier to resolve the impact of experimental changes
  // to the quantizer tables.
  for (i = 0; i < QINDEX_RANGE; i++) {
    sad_per_bit16lut[i] =
159
      (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
160
    sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742);
John Koleszar's avatar
John Koleszar committed
161
  }
Paul Wilkins's avatar
Paul Wilkins committed
162
}
John Koleszar's avatar
John Koleszar committed
163

164
int vp9_compute_rd_mult(VP9_COMP *cpi, int qindex) {
165
  const int q = vp9_dc_quant(qindex, 0);
166
  // TODO(debargha): Adjust the function below
167 168 169 170 171 172 173 174
  int rdmult = 88 * q * q / 25;
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
    if (cpi->twopass.next_iiratio > 31)
      rdmult += (rdmult * rd_iifactor[31]) >> 4;
    else
      rdmult += (rdmult * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
  }
  return rdmult;
175 176 177 178 179 180 181 182 183
}

static int compute_rd_thresh_factor(int qindex) {
  int q;
  // TODO(debargha): Adjust the function below
  q = (int)(pow(vp9_dc_quant(qindex, 0) / 4.0, RD_THRESH_POW) * 5.12);
  if (q < 8)
    q = 8;
  return q;
184 185
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
186 187 188
void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
  cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
  cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
John Koleszar's avatar
John Koleszar committed
189 190
}

191 192 193 194 195 196 197 198 199
static void set_block_thresholds(VP9_COMP *cpi) {
  int i, bsize, segment_id;
  VP9_COMMON *cm = &cpi->common;

  for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
    int q;
    int segment_qindex = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex);
    segment_qindex = clamp(segment_qindex + cm->y_dc_delta_q, 0, MAXQ);
    q = compute_rd_thresh_factor(segment_qindex);
200

201
    for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
202 203 204
      // Threshold here seem unecessarily harsh but fine given actual
      // range of values used for cpi->sf.thresh_mult[]
      int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);
John Koleszar's avatar
John Koleszar committed
205

206 207 208 209 210 211 212 213
      for (i = 0; i < MAX_MODES; ++i) {
        if (cpi->sf.thresh_mult[i] < thresh_max) {
          cpi->rd_threshes[segment_id][bsize][i] =
              cpi->sf.thresh_mult[i] * q *
              rd_thresh_block_size_factor[bsize] / 4;
        } else {
          cpi->rd_threshes[segment_id][bsize][i] = INT_MAX;
        }
John Koleszar's avatar
John Koleszar committed
214
      }
215

216 217 218 219 220 221 222 223
      for (i = 0; i < MAX_REFS; ++i) {
        if (cpi->sf.thresh_mult_sub8x8[i] < thresh_max) {
          cpi->rd_thresh_sub8x8[segment_id][bsize][i] =
              cpi->sf.thresh_mult_sub8x8[i] * q *
              rd_thresh_block_size_factor[bsize] / 4;
        } else {
          cpi->rd_thresh_sub8x8[segment_id][bsize][i] = INT_MAX;
        }
224 225
      }
    }
John Koleszar's avatar
John Koleszar committed
226
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
227 228
}

229 230 231
void vp9_initialize_rd_consts(VP9_COMP *cpi) {
  VP9_COMMON *cm = &cpi->common;
  int qindex, i;
Deb Mukherjee's avatar
Deb Mukherjee committed
232 233 234 235 236 237 238

  vp9_clear_system_state();  // __asm emms;

  // Further tests required to see if optimum is different
  // for key frames, golden frames and arf frames.
  // if (cpi->common.refresh_golden_frame ||
  //     cpi->common.refresh_alt_ref_frame)
239
  qindex = clamp(cm->base_qindex + cm->y_dc_delta_q, 0, MAXQ);
Deb Mukherjee's avatar
Deb Mukherjee committed
240

241
  cpi->RDDIV = RDDIV_BITS;  // in bits (to multiply D by 128)
242 243
  cpi->RDMULT = vp9_compute_rd_mult(cpi, qindex);

244
  cpi->mb.errorperbit = cpi->RDMULT / RD_MULT_EPB_RATIO;
Deb Mukherjee's avatar
Deb Mukherjee committed
245 246 247 248
  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);

  vp9_set_speed_features(cpi);

249
  set_block_thresholds(cpi);
John Koleszar's avatar
John Koleszar committed
250

251
  fill_token_costs(cpi->mb.token_costs, cpi->common.fc.coef_probs);
252

253
  for (i = 0; i < NUM_PARTITION_CONTEXTS; i++)
254
    vp9_cost_tokens(cpi->mb.partition_cost[i],
255
                    cpi->common.fc.partition_prob[cpi->common.frame_type][i],
256 257
                    vp9_partition_tree);

John Koleszar's avatar
John Koleszar committed
258
  /*rough estimate for costing*/
259
  vp9_init_mode_costs(cpi);
John Koleszar's avatar
John Koleszar committed
260

261
  if (!frame_is_intra_only(&cpi->common)) {
262
    vp9_build_nmv_cost_table(
263 264 265 266 267
        cpi->mb.nmvjointcost,
        cpi->mb.e_mbd.allow_high_precision_mv ?
        cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
        &cpi->common.fc.nmvc,
        cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
268 269 270 271 272

    for (i = 0; i < INTER_MODE_CONTEXTS; i++) {
      MB_PREDICTION_MODE m;

      for (m = NEARESTMV; m < MB_MODE_COUNT; m++)
273
        cpi->mb.inter_mode_cost[i][inter_mode_offset(m)] =
274
            cost_token(vp9_inter_mode_tree,
275
                       cpi->common.fc.inter_mode_probs[i],
276
                       vp9_inter_mode_encodings + inter_mode_offset(m));
277
    }
278
  }
John Koleszar's avatar
John Koleszar committed
279 280
}

Yaowu Xu's avatar
Yaowu Xu committed
281
static INLINE void linear_interpolate2(double x, int ntab, int inv_step,
282 283
                                       const double *tab1, const double *tab2,
                                       double *v1, double *v2) {
Deb Mukherjee's avatar
Deb Mukherjee committed
284 285 286
  double y = x * inv_step;
  int d = (int) y;
  if (d >= ntab - 1) {
287 288
    *v1 = tab1[ntab - 1];
    *v2 = tab2[ntab - 1];
Deb Mukherjee's avatar
Deb Mukherjee committed
289 290
  } else {
    double a = y - d;
291 292
    *v1 = tab1[d] * (1 - a) + tab1[d + 1] * a;
    *v2 = tab2[d] * (1 - a) + tab2[d + 1] * a;
Deb Mukherjee's avatar
Deb Mukherjee committed
293 294 295
  }
}

296 297 298 299 300
static void model_rd_norm(double x, double *R, double *D) {
  static const int inv_tab_step = 8;
  static const int tab_size = 120;
  // NOTE: The tables below must be of the same size
  //
Deb Mukherjee's avatar
Deb Mukherjee committed
301
  // Normalized rate
302
  // This table models the rate for a Laplacian source
Deb Mukherjee's avatar
Deb Mukherjee committed
303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325
  // source with given variance when quantized with a uniform quantizer
  // with given stepsize. The closed form expression is:
  // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
  // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
  // and H(x) is the binary entropy function.
  static const double rate_tab[] = {
    64.00, 4.944, 3.949, 3.372, 2.966, 2.655, 2.403, 2.194,
    2.014, 1.858, 1.720, 1.596, 1.485, 1.384, 1.291, 1.206,
    1.127, 1.054, 0.986, 0.923, 0.863, 0.808, 0.756, 0.708,
    0.662, 0.619, 0.579, 0.541, 0.506, 0.473, 0.442, 0.412,
    0.385, 0.359, 0.335, 0.313, 0.291, 0.272, 0.253, 0.236,
    0.220, 0.204, 0.190, 0.177, 0.165, 0.153, 0.142, 0.132,
    0.123, 0.114, 0.106, 0.099, 0.091, 0.085, 0.079, 0.073,
    0.068, 0.063, 0.058, 0.054, 0.050, 0.047, 0.043, 0.040,
    0.037, 0.034, 0.032, 0.029, 0.027, 0.025, 0.023, 0.022,
    0.020, 0.019, 0.017, 0.016, 0.015, 0.014, 0.013, 0.012,
    0.011, 0.010, 0.009, 0.008, 0.008, 0.007, 0.007, 0.006,
    0.006, 0.005, 0.005, 0.005, 0.004, 0.004, 0.004, 0.003,
    0.003, 0.003, 0.003, 0.002, 0.002, 0.002, 0.002, 0.002,
    0.002, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001,
    0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.000,
  };
  // Normalized distortion
326
  // This table models the normalized distortion for a Laplacian source
Deb Mukherjee's avatar
Deb Mukherjee committed
327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348
  // source with given variance when quantized with a uniform quantizer
  // with given stepsize. The closed form expression is:
  // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
  // where x = qpstep / sqrt(variance)
  // Note the actual distortion is Dn * variance.
  static const double dist_tab[] = {
    0.000, 0.001, 0.005, 0.012, 0.021, 0.032, 0.045, 0.061,
    0.079, 0.098, 0.119, 0.142, 0.166, 0.190, 0.216, 0.242,
    0.269, 0.296, 0.324, 0.351, 0.378, 0.405, 0.432, 0.458,
    0.484, 0.509, 0.534, 0.557, 0.580, 0.603, 0.624, 0.645,
    0.664, 0.683, 0.702, 0.719, 0.735, 0.751, 0.766, 0.780,
    0.794, 0.807, 0.819, 0.830, 0.841, 0.851, 0.861, 0.870,
    0.878, 0.886, 0.894, 0.901, 0.907, 0.913, 0.919, 0.925,
    0.930, 0.935, 0.939, 0.943, 0.947, 0.951, 0.954, 0.957,
    0.960, 0.963, 0.966, 0.968, 0.971, 0.973, 0.975, 0.976,
    0.978, 0.980, 0.981, 0.982, 0.984, 0.985, 0.986, 0.987,
    0.988, 0.989, 0.990, 0.990, 0.991, 0.992, 0.992, 0.993,
    0.993, 0.994, 0.994, 0.995, 0.995, 0.996, 0.996, 0.996,
    0.996, 0.997, 0.997, 0.997, 0.997, 0.998, 0.998, 0.998,
    0.998, 0.998, 0.998, 0.999, 0.999, 0.999, 0.999, 0.999,
    0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 1.000,
  };
349 350 351 352 353
  /*
  assert(sizeof(rate_tab) == tab_size * sizeof(rate_tab[0]);
  assert(sizeof(dist_tab) == tab_size * sizeof(dist_tab[0]);
  assert(sizeof(rate_tab) == sizeof(dist_tab));
  */
Deb Mukherjee's avatar
Deb Mukherjee committed
354
  assert(x >= 0.0);
355 356
  linear_interpolate2(x, tab_size, inv_tab_step,
                      rate_tab, dist_tab, R, D);
Deb Mukherjee's avatar
Deb Mukherjee committed
357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374
}

static void model_rd_from_var_lapndz(int var, int n, int qstep,
                                     int *rate, int64_t *dist) {
  // This function models the rate and distortion for a Laplacian
  // source with given variance when quantized with a uniform quantizer
  // with given stepsize. The closed form expressions are in:
  // Hang and Chen, "Source Model for transform video coder and its
  // application - Part I: Fundamental Theory", IEEE Trans. Circ.
  // Sys. for Video Tech., April 1997.
  vp9_clear_system_state();
  if (var == 0 || n == 0) {
    *rate = 0;
    *dist = 0;
  } else {
    double D, R;
    double s2 = (double) var / n;
    double x = qstep / sqrt(s2);
375
    model_rd_norm(x, &R, &D);
Yaowu Xu's avatar
Yaowu Xu committed
376 377
    *rate = (int)((n << 8) * R + 0.5);
    *dist = (int)(var * D + 0.5);
Deb Mukherjee's avatar
Deb Mukherjee committed
378 379 380 381
  }
  vp9_clear_system_state();
}

382
static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
Deb Mukherjee's avatar
Deb Mukherjee committed
383 384 385 386 387 388 389 390 391 392
                            MACROBLOCK *x, MACROBLOCKD *xd,
                            int *out_rate_sum, int64_t *out_dist_sum) {
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
  int i, rate_sum = 0, dist_sum = 0;

  for (i = 0; i < MAX_MB_PLANE; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &xd->plane[i];
393
    const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
Deb Mukherjee's avatar
Deb Mukherjee committed
394 395 396 397 398 399
    unsigned int sse;
    int rate;
    int64_t dist;
    (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
                              pd->dst.buf, pd->dst.stride, &sse);
    // sse works better than var, since there is no dc prediction used
Dmitry Kovalev's avatar
Dmitry Kovalev committed
400
    model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
401
                             pd->dequant[1] >> 3, &rate, &dist);
Deb Mukherjee's avatar
Deb Mukherjee committed
402 403

    rate_sum += rate;
Yaowu Xu's avatar
Yaowu Xu committed
404
    dist_sum += (int)dist;
Deb Mukherjee's avatar
Deb Mukherjee committed
405 406 407 408 409 410
  }

  *out_rate_sum = rate_sum;
  *out_dist_sum = dist_sum << 4;
}

411
static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE bsize,
Deb Mukherjee's avatar
Deb Mukherjee committed
412 413 414 415
                                 TX_SIZE tx_size,
                                 MACROBLOCK *x, MACROBLOCKD *xd,
                                 int *out_rate_sum, int64_t *out_dist_sum,
                                 int *out_skip) {
416
  int j, k;
417
  BLOCK_SIZE bs;
Deb Mukherjee's avatar
Deb Mukherjee committed
418 419
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];
420 421
  const int width = 4 << num_4x4_blocks_wide_lookup[bsize];
  const int height = 4 << num_4x4_blocks_high_lookup[bsize];
Deb Mukherjee's avatar
Deb Mukherjee committed
422 423
  int rate_sum = 0;
  int64_t dist_sum = 0;
424
  const int t = 4 << tx_size;
Deb Mukherjee's avatar
Deb Mukherjee committed
425 426 427 428 429 430 431 432 433 434 435 436

  if (tx_size == TX_4X4) {
    bs = BLOCK_4X4;
  } else if (tx_size == TX_8X8) {
    bs = BLOCK_8X8;
  } else if (tx_size == TX_16X16) {
    bs = BLOCK_16X16;
  } else if (tx_size == TX_32X32) {
    bs = BLOCK_32X32;
  } else {
    assert(0);
  }
437

Deb Mukherjee's avatar
Deb Mukherjee committed
438
  *out_skip = 1;
Jim Bankoski's avatar
Jim Bankoski committed
439 440
  for (j = 0; j < height; j += t) {
    for (k = 0; k < width; k += t) {
Deb Mukherjee's avatar
Deb Mukherjee committed
441 442 443
      int rate;
      int64_t dist;
      unsigned int sse;
444 445 446
      cpi->fn_ptr[bs].vf(&p->src.buf[j * p->src.stride + k], p->src.stride,
                         &pd->dst.buf[j * pd->dst.stride + k], pd->dst.stride,
                         &sse);
Deb Mukherjee's avatar
Deb Mukherjee committed
447
      // sse works better than var, since there is no dc prediction used
448
      model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3, &rate, &dist);
Deb Mukherjee's avatar
Deb Mukherjee committed
449 450 451 452 453
      rate_sum += rate;
      dist_sum += dist;
      *out_skip &= (rate < 1024);
    }
  }
454

Deb Mukherjee's avatar
Deb Mukherjee committed
455
  *out_rate_sum = rate_sum;
456
  *out_dist_sum = dist_sum << 4;
Deb Mukherjee's avatar
Deb Mukherjee committed
457 458
}

Ronald S. Bultje's avatar
Ronald S. Bultje committed
459
int64_t vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff,
460
                          intptr_t block_size, int64_t *ssz) {
Ronald S. Bultje's avatar
Ronald S. Bultje committed
461
  int i;
462
  int64_t error = 0, sqcoeff = 0;
John Koleszar's avatar
John Koleszar committed
463

464
  for (i = 0; i < block_size; i++) {
John Koleszar's avatar
John Koleszar committed
465
    int this_diff = coeff[i] - dqcoeff[i];
Ronald S. Bultje's avatar
Ronald S. Bultje committed
466
    error += (unsigned)this_diff * this_diff;
467
    sqcoeff += (unsigned) coeff[i] * coeff[i];
John Koleszar's avatar
John Koleszar committed
468
  }
John Koleszar's avatar
John Koleszar committed
469

470
  *ssz = sqcoeff;
John Koleszar's avatar
John Koleszar committed
471
  return error;
John Koleszar's avatar
John Koleszar committed
472 473
}

474 475 476 477 478
/* The trailing '0' is a terminator which is used inside cost_coeffs() to
 * decide whether to include cost of a trailing EOB node or not (i.e. we
 * can skip this if the last coefficient in this transform block, e.g. the
 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
 * were non-zero). */
479
static const int16_t band_counts[TX_SIZES][8] = {
480 481 482 483
  { 1, 2, 3, 4,  3,   16 - 13, 0 },
  { 1, 2, 3, 4, 11,   64 - 21, 0 },
  { 1, 2, 3, 4, 11,  256 - 21, 0 },
  { 1, 2, 3, 4, 11, 1024 - 21, 0 },
484 485
};

486
static INLINE int cost_coeffs(MACROBLOCK *x,
487
                              int plane, int block,
488
                              ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
John Koleszar's avatar
John Koleszar committed
489
                              TX_SIZE tx_size,
490
                              const int16_t *scan, const int16_t *nb) {
491
  MACROBLOCKD *const xd = &x->e_mbd;
492
  MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
493 494
  struct macroblockd_plane *pd = &xd->plane[plane];
  const PLANE_TYPE type = pd->plane_type;
495
  const int16_t *band_count = &band_counts[tx_size][1];
496 497
  const int eob = pd->eobs[block];
  const int16_t *const qcoeff_ptr = BLOCK_OFFSET(pd->qcoeff, block);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
498
  const int ref = mbmi->ref_frame[0] != INTRA_FRAME;
499
  unsigned int (*token_costs)[2][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
500
                   x->token_costs[tx_size][type][ref];
501
  const ENTROPY_CONTEXT above_ec = !!*A, left_ec = !!*L;
502
  uint8_t *p_tok = x->token_cache;
503 504
  int pt = combine_entropy_contexts(above_ec, left_ec);
  int c, cost;
505 506

  // Check for consistency of tx_size with mode info
507
  assert(type == PLANE_TYPE_Y_WITH_DC ? mbmi->tx_size == tx_size
508
                                      : get_uv_tx_size(mbmi) == tx_size);
509

510 511
  if (eob == 0) {
    // single eob token
512 513
    cost = token_costs[0][0][pt][DCT_EOB_TOKEN];
    c = 0;
514
  } else {
515
    int band_left = *band_count++;
516 517

    // dc token
518 519
    int v = qcoeff_ptr[0];
    int prev_t = vp9_dct_value_tokens_ptr[v].token;
520
    cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
521
    p_tok[0] = vp9_pt_energy_class[prev_t];
522
    ++token_costs;
523 524 525 526

    // ac tokens
    for (c = 1; c < eob; c++) {
      const int rc = scan[c];
527
      int t;
528 529 530

      v = qcoeff_ptr[rc];
      t = vp9_dct_value_tokens_ptr[v].token;
531
      pt = get_coef_context(nb, p_tok, c);
532
      cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
533
      p_tok[rc] = vp9_pt_energy_class[t];
534
      prev_t = t;
535
      if (!--band_left) {
536 537
        band_left = *band_count++;
        ++token_costs;
538
      }
539
    }
540 541

    // eob token
542
    if (band_left) {
543
      pt = get_coef_context(nb, p_tok, c);
544
      cost += (*token_costs)[0][pt][DCT_EOB_TOKEN];
545
    }
546 547
  }

548
  // is eob first coefficient;
549
  *A = *L = (c > 0);
550

551 552 553
  return cost;
}

554
static void dist_block(int plane, int block, TX_SIZE tx_size, void *arg) {
555
  const int ss_txfrm_size = tx_size << 1;
Deb Mukherjee's avatar
Deb Mukherjee committed
556 557 558
  struct rdcost_block_args* args = arg;
  MACROBLOCK* const x = args->x;
  MACROBLOCKD* const xd = &x->e_mbd;
559 560
  struct macroblock_plane *const p = &x->plane[plane];
  struct macroblockd_plane *const pd = &xd->plane[plane];
Deb Mukherjee's avatar
Deb Mukherjee committed
561 562
  int64_t this_sse;
  int shift = args->tx_size == TX_32X32 ? 0 : 2;
563 564
  int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
565 566 567
  args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
                               &this_sse) >> shift;
  args->sse  = this_sse >> shift;
568 569

  if (x->skip_encode &&
570
      xd->mi_8x8[0]->mbmi.ref_frame[0] == INTRA_FRAME) {
571 572
    // TODO(jingning): tune the model to better capture the distortion.
    int64_t p = (pd->dequant[1] * pd->dequant[1] *
573
                    (1 << ss_txfrm_size)) >> (shift + 2);
574 575
    args->dist += (p >> 4);
    args->sse  += p;
576
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
577 578
}

579
static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
580
                       TX_SIZE tx_size, void *arg) {
Deb Mukherjee's avatar
Deb Mukherjee committed
581
  struct rdcost_block_args* args = arg;
582

Deb Mukherjee's avatar
Deb Mukherjee committed
583
  int x_idx, y_idx;
584
  txfrm_block_to_raster_xy(plane_bsize, args->tx_size, block, &x_idx, &y_idx);
Deb Mukherjee's avatar
Deb Mukherjee committed
585

586 587 588
  args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx,
                           args->t_left + y_idx, args->tx_size,
                           args->scan, args->nb);
Deb Mukherjee's avatar
Deb Mukherjee committed
589 590
}

591
static void block_yrd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
592
                           TX_SIZE tx_size, void *arg) {
Deb Mukherjee's avatar
Deb Mukherjee committed
593 594 595
  struct rdcost_block_args *args = arg;
  MACROBLOCK *const x = args->x;
  MACROBLOCKD *const xd = &x->e_mbd;
596
  struct encode_b_args encode_args = {x, NULL};
597
  int64_t rd1, rd2, rd;
Deb Mukherjee's avatar
Deb Mukherjee committed
598

599 600 601
  if (args->skip)
    return;

602
  if (!is_inter_block(&xd->mi_8x8[0]->mbmi))
603
    vp9_encode_block_intra(plane, block, plane_bsize, tx_size, &encode_args);
Deb Mukherjee's avatar
Deb Mukherjee committed
604
  else
605
    vp9_xform_quant(plane, block, plane_bsize, tx_size, &encode_args);
Deb Mukherjee's avatar
Deb Mukherjee committed
606

607 608
  dist_block(plane, block, tx_size, args);
  rate_block(plane, block, plane_bsize, tx_size, args);
609 610
  rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
  rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
611 612

  // TODO(jingning): temporarily enabled only for luma component
613
  rd = MIN(rd1, rd2);
614 615 616
  if (plane == 0)
    x->zcoeff_blk[tx_size][block] = rd1 > rd2;

617 618 619
  args->this_rate += args->rate;
  args->this_dist += args->dist;
  args->this_sse  += args->sse;
620 621 622 623 624 625
  args->this_rd += rd;

  if (args->this_rd > args->best_rd) {
    args->skip = 1;
    return;
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
626 627
}

628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660
void vp9_get_entropy_contexts(TX_SIZE tx_size,
    ENTROPY_CONTEXT t_above[16], ENTROPY_CONTEXT t_left[16],
    const ENTROPY_CONTEXT *above, const ENTROPY_CONTEXT *left,
    int num_4x4_w, int num_4x4_h) {
  int i;
  switch (tx_size) {
    case TX_4X4:
      vpx_memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
      vpx_memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
      break;
    case TX_8X8:
      for (i = 0; i < num_4x4_w; i += 2)
        t_above[i] = !!*(const uint16_t *)&above[i];
      for (i = 0; i < num_4x4_h; i += 2)
        t_left[i] = !!*(const uint16_t *)&left[i];
      break;
    case TX_16X16:
      for (i = 0; i < num_4x4_w; i += 4)
        t_above[i] = !!*(const uint32_t *)&above[i];
      for (i = 0; i < num_4x4_h; i += 4)
        t_left[i] = !!*(const uint32_t *)&left[i];
      break;
    case TX_32X32:
      for (i = 0; i < num_4x4_w; i += 8)
        t_above[i] = !!*(const uint64_t *)&above[i];
      for (i = 0; i < num_4x4_h; i += 8)
        t_left[i] = !!*(const uint64_t *)&left[i];
      break;
    default:
      assert(!"Invalid transform size.");
  }
}

661 662 663 664 665 666 667 668 669 670 671 672
static void init_rdcost_stack(MACROBLOCK *x, TX_SIZE tx_size,
                              const int num_4x4_w, const int num_4x4_h,
                              const int64_t ref_rdcost,
                              struct rdcost_block_args *arg) {
  vpx_memset(arg, 0, sizeof(struct rdcost_block_args));
  arg->x = x;
  arg->tx_size = tx_size;
  arg->bw = num_4x4_w;
  arg->bh = num_4x4_h;
  arg->best_rd = ref_rdcost;
}

673
static void txfm_rd_in_plane(MACROBLOCK *x,
674
                             struct rdcost_block_args *rd_stack,
675 676 677
                             int *rate, int64_t *distortion,
                             int *skippable, int64_t *sse,
                             int64_t ref_best_rd, int plane,
678
                             BLOCK_SIZE bsize, TX_SIZE tx_size) {
Deb Mukherjee's avatar
Deb Mukherjee committed
679
  MACROBLOCKD *const xd = &x->e_mbd;
680
  struct macroblockd_plane *const pd = &xd->plane[plane];
681
  const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
682 683 684
  const int num_4x4_w = num_4x4_blocks_wide_lookup[bs];
  const int num_4x4_h = num_4x4_blocks_high_lookup[bs];

685 686
  init_rdcost_stack(x, tx_size, num_4x4_w, num_4x4_h,
                    ref_best_rd, rd_stack);
687
  if (plane == 0)
688
    xd->mi_8x8[0]->mbmi.tx_size = tx_size;
689

690
  vp9_get_entropy_contexts(tx_size, rd_stack->t_above, rd_stack->t_left,
691 692
                           pd->above_context, pd->left_context,
                           num_4x4_w, num_4x4_h);
693

694
  get_scan(xd, tx_size, pd->plane_type, 0, &rd_stack->scan, &rd_stack->nb);
Deb Mukherjee's avatar
Deb Mukherjee committed
695

696 697 698
  foreach_transformed_block_in_plane(xd, bsize, plane,
                                     block_yrd_txfm, rd_stack);
  if (rd_stack->skip) {
699 700 701 702 703
    *rate       = INT_MAX;
    *distortion = INT64_MAX;
    *sse        = INT64_MAX;
    *skippable  = 0;
  } else {
704 705 706
    *distortion = rd_stack->this_dist;
    *rate       = rd_stack->this_rate;
    *sse        = rd_stack->this_sse;
707 708
    *skippable  = vp9_is_skippable_in_plane(xd, bsize, plane);
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
709 710 711 712 713
}

static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x,
                                     int *rate, int64_t *distortion,
                                     int *skip, int64_t *sse,
714
                                     int64_t ref_best_rd,
715
                                     BLOCK_SIZE bs) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
716
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
Deb Mukherjee's avatar
Deb Mukherjee committed
717 718
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
719
  MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
720
  if (max_tx_size == TX_32X32 &&
721 722
      (cm->tx_mode == ALLOW_32X32 ||
       cm->tx_mode == TX_MODE_SELECT)) {
723
    mbmi->tx_size = TX_32X32;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
724
  } else if (max_tx_size >= TX_16X16 &&
725 726 727
             (cm->tx_mode == ALLOW_16X16 ||
              cm->tx_mode == ALLOW_32X32 ||
              cm->tx_mode == TX_MODE_SELECT)) {
728
    mbmi->tx_size = TX_16X16;
729
  } else if (cm->tx_mode != ONLY_4X4) {
730
    mbmi->tx_size = TX_8X8;
Deb Mukherjee's avatar
Deb Mukherjee committed
731
  } else {
732
    mbmi->tx_size = TX_4X4;
Deb Mukherjee's avatar
Deb Mukherjee committed
733
  }
734
  txfm_rd_in_plane(x, &cpi->rdcost_stack, rate, distortion, skip,
735 736
                   &sse[mbmi->tx_size], ref_best_rd, 0, bs,
                   mbmi->tx_size);
Dmitry Kovalev's avatar
Dmitry Kovalev committed
737
  cpi->tx_stepdown_count[0]++;
Deb Mukherjee's avatar
Deb Mukherjee committed
738 739
}

740
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
741
                                     int (*r)[2], int *rate,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
742
                                     int64_t *d, int64_t *distortion,
743
                                     int *s, int *skip,
744
                                     int64_t tx_cache[TX_MODES],
745
                                     BLOCK_SIZE bs) {
746
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
747 748
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
749
  MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
750
  vp9_prob skip_prob = vp9_get_pred_prob_mbskip(cm, xd);
751
  int64_t rd[TX_SIZES][2];
752
  int n, m;
753
  int s0, s1;
754

755
  const vp9_prob *tx_probs = get_tx_probs2(xd, &cm->fc.tx_probs, xd->mi_8x8[0]);
756

757
  for (n = TX_4X4; n <= max_tx_size; n++) {
758
    r[n][1] = r[n][0];
759 760
    if (r[n][0] == INT_MAX)
      continue;
761
    for (m = 0; m <= n - (n == max_tx_size); m++) {
762
      if (m == n)
763
        r[n][1] += vp9_cost_zero(tx_probs[m]);
764
      else
765
        r[n][1] += vp9_cost_one(tx_probs[m]);
766 767
    }
  }
768

769 770 771
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
772

773
  for (n = TX_4X4; n <= max_tx_size; n++) {
774 775 776 777
    if (d[n] == INT64_MAX) {
      rd[n][0] = rd[n][1] = INT64_MAX;
      continue;
    }
778 779 780 781 782
    if (s[n]) {
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
783 784 785
    }
  }

786
  if (max_tx_size == TX_32X32 &&
787 788
      (cm->tx_mode == ALLOW_32X32 ||
       (cm->tx_mode == TX_MODE_SELECT &&
789 790
        rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
        rd[TX_32X32][1] < rd[TX_4X4][1]))) {
791
    mbmi->tx_size = TX_32X32;
792
  } else if (max_tx_size >= TX_16X16 &&
793 794 795
             (cm->tx_mode == ALLOW_16X16 ||
              cm->tx_mode == ALLOW_32X32 ||
              (cm->tx_mode == TX_MODE_SELECT &&
796 797
               rd[TX_16X16][1] < rd[TX_8X8][1] &&
               rd[TX_16X16][1] < rd[TX_4X4][1]))) {
798
    mbmi->tx_size = TX_16X16;
799 800 801 802
  } else if (cm->tx_mode == ALLOW_8X8 ||
             cm->tx_mode == ALLOW_16X16 ||
             cm->tx_mode == ALLOW_32X32 ||
           (cm->tx_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
803
    mbmi->tx_size = TX_8X8;
804
  } else {
805
    mbmi->tx_size = TX_4X4;
806 807
  }

808 809 810
  *distortion = d[mbmi->tx_size];
  *rate       = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT];
  *skip       = s[mbmi->tx_size];
811

812 813 814 815 816
  tx_cache[ONLY_4X4] = rd[TX_4X4][0];
  tx_cache[ALLOW_8X8] = rd[TX_8X8][0];
  tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0];
  tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0];
  if (max_tx_size == TX_32X32 &&
817 818
      rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
      rd[TX_32X32][1] < rd[TX_4X4][1])
819 820
    tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
  else if (max_tx_size >= TX_16X16 &&
821
           rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])