vp9_rdopt.c 160 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5 6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9 10 11
 */

#include <assert.h>
12
#include <math.h>
13

14
#include "./vp9_rtcd.h"
15
#include "./vpx_dsp_rtcd.h"
16 17

#include "vpx_mem/vpx_mem.h"
18
#include "vpx_ports/mem.h"
19 20 21

#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_entropy.h"
22
#include "vp9/common/vp9_entropymode.h"
23 24 25 26
#include "vp9/common/vp9_idct.h"
#include "vp9/common/vp9_mvref_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_quant_common.h"
27 28
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
Scott LaVarnway's avatar
Scott LaVarnway committed
29
#include "vp9/common/vp9_scan.h"
30 31 32
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_systemdependent.h"

Dmitry Kovalev's avatar
Dmitry Kovalev committed
33
#include "vp9/encoder/vp9_cost.h"
34
#include "vp9/encoder/vp9_encodemb.h"
35
#include "vp9/encoder/vp9_encodemv.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
36
#include "vp9/encoder/vp9_encoder.h"
37
#include "vp9/encoder/vp9_mcomp.h"
38
#include "vp9/encoder/vp9_quantize.h"
39
#include "vp9/encoder/vp9_ratectrl.h"
40
#include "vp9/encoder/vp9_rd.h"
41
#include "vp9/encoder/vp9_rdopt.h"
42
#include "vp9/encoder/vp9_aq_variance.h"
Paul Wilkins's avatar
Paul Wilkins committed
43

44 45 46 47 48 49 50 51
#define LAST_FRAME_MODE_MASK    ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | \
                                 (1 << INTRA_FRAME))
#define GOLDEN_FRAME_MODE_MASK  ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | \
                                 (1 << INTRA_FRAME))
#define ALT_REF_MODE_MASK       ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | \
                                 (1 << INTRA_FRAME))

#define SECOND_REF_FRAME_MASK   ((1 << ALTREF_FRAME) | 0x01)
52

Paul Wilkins's avatar
Paul Wilkins committed
53
#define MIN_EARLY_TERM_INDEX    3
54
#define NEW_MV_DISCOUNT_FACTOR  8
Paul Wilkins's avatar
Paul Wilkins committed
55

56
typedef struct {
57
  PREDICTION_MODE mode;
58 59 60 61 62 63 64
  MV_REFERENCE_FRAME ref_frame[2];
} MODE_DEFINITION;

typedef struct {
  MV_REFERENCE_FRAME ref_frame[2];
} REF_DEFINITION;

Alex Converse's avatar
Alex Converse committed
65 66 67 68 69 70 71 72 73 74 75 76 77
struct rdcost_block_args {
  MACROBLOCK *x;
  ENTROPY_CONTEXT t_above[16];
  ENTROPY_CONTEXT t_left[16];
  int rate;
  int64_t dist;
  int64_t sse;
  int this_rate;
  int64_t this_dist;
  int64_t this_sse;
  int64_t this_rd;
  int64_t best_rd;
  int skip;
78
  int use_fast_coef_costing;
79
  const scan_order *so;
Alex Converse's avatar
Alex Converse committed
80 81
};

82
#define LAST_NEW_MV_INDEX 6
83
static const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
84 85 86 87 88 89 90 91 92 93 94 95
  {NEARESTMV, {LAST_FRAME,   NONE}},
  {NEARESTMV, {ALTREF_FRAME, NONE}},
  {NEARESTMV, {GOLDEN_FRAME, NONE}},

  {DC_PRED,   {INTRA_FRAME,  NONE}},

  {NEWMV,     {LAST_FRAME,   NONE}},
  {NEWMV,     {ALTREF_FRAME, NONE}},
  {NEWMV,     {GOLDEN_FRAME, NONE}},

  {NEARMV,    {LAST_FRAME,   NONE}},
  {NEARMV,    {ALTREF_FRAME, NONE}},
Jingning Han's avatar
Jingning Han committed
96 97 98 99 100 101
  {NEARMV,    {GOLDEN_FRAME, NONE}},

  {ZEROMV,    {LAST_FRAME,   NONE}},
  {ZEROMV,    {GOLDEN_FRAME, NONE}},
  {ZEROMV,    {ALTREF_FRAME, NONE}},

102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
  {NEARESTMV, {LAST_FRAME,   ALTREF_FRAME}},
  {NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}},

  {TM_PRED,   {INTRA_FRAME,  NONE}},

  {NEARMV,    {LAST_FRAME,   ALTREF_FRAME}},
  {NEWMV,     {LAST_FRAME,   ALTREF_FRAME}},
  {NEARMV,    {GOLDEN_FRAME, ALTREF_FRAME}},
  {NEWMV,     {GOLDEN_FRAME, ALTREF_FRAME}},

  {ZEROMV,    {LAST_FRAME,   ALTREF_FRAME}},
  {ZEROMV,    {GOLDEN_FRAME, ALTREF_FRAME}},

  {H_PRED,    {INTRA_FRAME,  NONE}},
  {V_PRED,    {INTRA_FRAME,  NONE}},
  {D135_PRED, {INTRA_FRAME,  NONE}},
  {D207_PRED, {INTRA_FRAME,  NONE}},
  {D153_PRED, {INTRA_FRAME,  NONE}},
  {D63_PRED,  {INTRA_FRAME,  NONE}},
  {D117_PRED, {INTRA_FRAME,  NONE}},
  {D45_PRED,  {INTRA_FRAME,  NONE}},
123 124
};

125
static const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
126 127 128 129 130 131
  {{LAST_FRAME,   NONE}},
  {{GOLDEN_FRAME, NONE}},
  {{ALTREF_FRAME, NONE}},
  {{LAST_FRAME,   ALTREF_FRAME}},
  {{GOLDEN_FRAME, ALTREF_FRAME}},
  {{INTRA_FRAME,  NONE}},
John Koleszar's avatar
John Koleszar committed
132 133
};

134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158
static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
                           int m, int n, int min_plane, int max_plane) {
  int i;

  for (i = min_plane; i < max_plane; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &x->e_mbd.plane[i];

    p->coeff    = ctx->coeff_pbuf[i][m];
    p->qcoeff   = ctx->qcoeff_pbuf[i][m];
    pd->dqcoeff = ctx->dqcoeff_pbuf[i][m];
    p->eobs     = ctx->eobs_pbuf[i][m];

    ctx->coeff_pbuf[i][m]   = ctx->coeff_pbuf[i][n];
    ctx->qcoeff_pbuf[i][m]  = ctx->qcoeff_pbuf[i][n];
    ctx->dqcoeff_pbuf[i][m] = ctx->dqcoeff_pbuf[i][n];
    ctx->eobs_pbuf[i][m]    = ctx->eobs_pbuf[i][n];

    ctx->coeff_pbuf[i][n]   = p->coeff;
    ctx->qcoeff_pbuf[i][n]  = p->qcoeff;
    ctx->dqcoeff_pbuf[i][n] = pd->dqcoeff;
    ctx->eobs_pbuf[i][n]    = p->eobs;
  }
}

159
static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
Deb Mukherjee's avatar
Deb Mukherjee committed
160
                            MACROBLOCK *x, MACROBLOCKD *xd,
161 162
                            int *out_rate_sum, int64_t *out_dist_sum,
                            int *skip_txfm_sb, int64_t *skip_sse_sb) {
Deb Mukherjee's avatar
Deb Mukherjee committed
163 164 165
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
166 167 168
  int i;
  int64_t rate_sum = 0;
  int64_t dist_sum = 0;
169
  const int ref = xd->mi[0]->mbmi.ref_frame[0];
170
  unsigned int sse;
171
  unsigned int var = 0;
172
  unsigned int sum_sse = 0;
173 174
  int64_t total_sse = 0;
  int skip_flag = 1;
175
  const int shift = 6;
176 177 178 179
  int rate;
  int64_t dist;

  x->pred_sse[ref] = 0;
Deb Mukherjee's avatar
Deb Mukherjee committed
180 181 182 183

  for (i = 0; i < MAX_MB_PLANE; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &xd->plane[i];
184
    const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
185 186
    const TX_SIZE max_tx_size = max_txsize_lookup[bs];
    const BLOCK_SIZE unit_size = txsize_to_bsize[max_tx_size];
187 188 189 190 191 192
    const int64_t dc_thr = p->quant_thred[0] >> shift;
    const int64_t ac_thr = p->quant_thred[1] >> shift;
    // The low thresholds are used to measure if the prediction errors are
    // low enough so that we can skip the mode search.
    const int64_t low_dc_thr = MIN(50, dc_thr >> 2);
    const int64_t low_ac_thr = MIN(80, ac_thr >> 2);
193 194 195 196 197 198
    int bw = 1 << (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
    int bh = 1 << (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
    int idx, idy;
    int lw = b_width_log2_lookup[unit_size] + 2;
    int lh = b_height_log2_lookup[unit_size] + 2;

199
    sum_sse = 0;
200 201 202 203 204

    for (idy = 0; idy < bh; ++idy) {
      for (idx = 0; idx < bw; ++idx) {
        uint8_t *src = p->src.buf + (idy * p->src.stride << lh) + (idx << lw);
        uint8_t *dst = pd->dst.buf + (idy * pd->dst.stride << lh) + (idx << lh);
205
        int block_idx = (idy << 1) + idx;
206
        int low_err_skip = 0;
207 208 209 210 211 212

        var = cpi->fn_ptr[unit_size].vf(src, p->src.stride,
                                        dst, pd->dst.stride, &sse);
        x->bsse[(i << 2) + block_idx] = sse;
        sum_sse += sse;

213
        x->skip_txfm[(i << 2) + block_idx] = 0;
214
        if (!x->select_tx_size) {
215
          // Check if all ac coefficients can be quantized to zero.
216
          if (var < ac_thr || var == 0) {
217
            x->skip_txfm[(i << 2) + block_idx] = 2;
218 219

            // Check if dc coefficient can be quantized to zero.
220
            if (sse - var < dc_thr || sse == var) {
221
              x->skip_txfm[(i << 2) + block_idx] = 1;
222 223 224 225

              if (!sse || (var < low_ac_thr && sse - var < low_dc_thr))
                low_err_skip = 1;
            }
226
          }
227
        }
228

229 230 231
        if (skip_flag && !low_err_skip)
          skip_flag = 0;

232 233 234 235
        if (i == 0)
          x->pred_sse[ref] += sse;
      }
    }
236

237 238
    total_sse += sum_sse;

239
    // Fast approximate the modelling function.
240
    if (cpi->oxcf.speed > 4) {
241
      int64_t rate;
242
      const int64_t square_error = sum_sse;
243
      int quantizer = (pd->dequant[1] >> 3);
244 245 246 247 248
#if CONFIG_VP9_HIGHBITDEPTH
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        quantizer >>= (xd->bd - 8);
      }
#endif  // CONFIG_VP9_HIGHBITDEPTH
249

250 251
      if (quantizer < 120)
        rate = (square_error * (280 - quantizer)) >> 8;
252 253 254 255 256
      else
        rate = 0;
      dist = (square_error * quantizer) >> 8;
      rate_sum += rate;
      dist_sum += dist;
257
    } else {
258 259
#if CONFIG_VP9_HIGHBITDEPTH
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
260
        vp9_model_rd_from_var_lapndz(sum_sse, num_pels_log2_lookup[bs],
261 262 263
                                     pd->dequant[1] >> (xd->bd - 5),
                                     &rate, &dist);
      } else {
264
        vp9_model_rd_from_var_lapndz(sum_sse, num_pels_log2_lookup[bs],
265 266 267
                                     pd->dequant[1] >> 3, &rate, &dist);
      }
#else
268
      vp9_model_rd_from_var_lapndz(sum_sse, num_pels_log2_lookup[bs],
269
                                   pd->dequant[1] >> 3, &rate, &dist);
270
#endif  // CONFIG_VP9_HIGHBITDEPTH
271
      rate_sum += rate;
272
      dist_sum += dist;
273
    }
Deb Mukherjee's avatar
Deb Mukherjee committed
274 275
  }

276 277
  *skip_txfm_sb = skip_flag;
  *skip_sse_sb = total_sse << 4;
278 279
  *out_rate_sum = (int)rate_sum;
  *out_dist_sum = dist_sum << 4;
Deb Mukherjee's avatar
Deb Mukherjee committed
280 281
}

282
int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
283
                          intptr_t block_size, int64_t *ssz) {
Ronald S. Bultje's avatar
Ronald S. Bultje committed
284
  int i;
285
  int64_t error = 0, sqcoeff = 0;
John Koleszar's avatar
John Koleszar committed
286

287
  for (i = 0; i < block_size; i++) {
288 289 290
    const int diff = coeff[i] - dqcoeff[i];
    error +=  diff * diff;
    sqcoeff += coeff[i] * coeff[i];
John Koleszar's avatar
John Koleszar committed
291
  }
John Koleszar's avatar
John Koleszar committed
292

293
  *ssz = sqcoeff;
John Koleszar's avatar
John Koleszar committed
294
  return error;
John Koleszar's avatar
John Koleszar committed
295 296
}

297 298 299 300 301 302 303 304 305 306 307 308
int64_t vp9_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff,
                             int block_size) {
  int i;
  int64_t error = 0;

  for (i = 0; i < block_size; i++) {
    const int diff = coeff[i] - dqcoeff[i];
    error +=  diff * diff;
  }

  return error;
}
309 310

#if CONFIG_VP9_HIGHBITDEPTH
311 312 313 314
int64_t vp9_highbd_block_error_c(const tran_low_t *coeff,
                                 const tran_low_t *dqcoeff,
                                 intptr_t block_size,
                                 int64_t *ssz, int bd) {
315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333
  int i;
  int64_t error = 0, sqcoeff = 0;
  int shift = 2 * (bd - 8);
  int rounding = shift > 0 ? 1 << (shift - 1) : 0;

  for (i = 0; i < block_size; i++) {
    const int64_t diff = coeff[i] - dqcoeff[i];
    error +=  diff * diff;
    sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
  }
  assert(error >= 0 && sqcoeff >= 0);
  error = (error + rounding) >> shift;
  sqcoeff = (sqcoeff + rounding) >> shift;

  *ssz = sqcoeff;
  return error;
}
#endif  // CONFIG_VP9_HIGHBITDEPTH

334 335 336 337 338
/* The trailing '0' is a terminator which is used inside cost_coeffs() to
 * decide whether to include cost of a trailing EOB node or not (i.e. we
 * can skip this if the last coefficient in this transform block, e.g. the
 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
 * were non-zero). */
339
static const int16_t band_counts[TX_SIZES][8] = {
340 341 342 343
  { 1, 2, 3, 4,  3,   16 - 13, 0 },
  { 1, 2, 3, 4, 11,   64 - 21, 0 },
  { 1, 2, 3, 4, 11,  256 - 21, 0 },
  { 1, 2, 3, 4, 11, 1024 - 21, 0 },
344
};
Alex Converse's avatar
Alex Converse committed
345 346 347 348 349 350
static int cost_coeffs(MACROBLOCK *x,
                       int plane, int block,
                       ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
                       TX_SIZE tx_size,
                       const int16_t *scan, const int16_t *nb,
                       int use_fast_coef_costing) {
351
  MACROBLOCKD *const xd = &x->e_mbd;
352
  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
353 354
  const struct macroblock_plane *p = &x->plane[plane];
  const struct macroblockd_plane *pd = &xd->plane[plane];
355
  const PLANE_TYPE type = pd->plane_type;
356
  const int16_t *band_count = &band_counts[tx_size][1];
357
  const int eob = p->eobs[block];
358
  const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
359
  unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
Dmitry Kovalev's avatar
Dmitry Kovalev committed
360
                   x->token_costs[tx_size][type][is_inter_block(mbmi)];
361
  uint8_t token_cache[32 * 32];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
362
  int pt = combine_entropy_contexts(*A, *L);
363
  int c, cost;
364 365 366 367 368 369
#if CONFIG_VP9_HIGHBITDEPTH
  const int16_t *cat6_high_cost = vp9_get_high_cost_table(xd->bd);
#else
  const int16_t *cat6_high_cost = vp9_get_high_cost_table(8);
#endif

370
  // Check for consistency of tx_size with mode info
371
  assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
372
                              : get_uv_tx_size(mbmi, pd) == tx_size);
373

374 375
  if (eob == 0) {
    // single eob token
376
    cost = token_costs[0][0][pt][EOB_TOKEN];
377
    c = 0;
378
  } else {
379
    int band_left = *band_count++;
380 381

    // dc token
Dmitry Kovalev's avatar
Dmitry Kovalev committed
382
    int v = qcoeff[0];
383 384 385 386 387 388
    int16_t prev_t;
    EXTRABIT e;
    vp9_get_token_extra(v, &prev_t, &e);
    cost = (*token_costs)[0][pt][prev_t] +
        vp9_get_cost(prev_t, e, cat6_high_cost);

389
    token_cache[0] = vp9_pt_energy_class[prev_t];
390
    ++token_costs;
391 392 393 394

    // ac tokens
    for (c = 1; c < eob; c++) {
      const int rc = scan[c];
395
      int16_t t;
396

Dmitry Kovalev's avatar
Dmitry Kovalev committed
397
      v = qcoeff[rc];
398
      vp9_get_token_extra(v, &t, &e);
399
      if (use_fast_coef_costing) {
400 401
        cost += (*token_costs)[!prev_t][!prev_t][t] +
            vp9_get_cost(t, e, cat6_high_cost);
402
      } else {
403
        pt = get_coef_context(nb, token_cache, c);
404 405
        cost += (*token_costs)[!prev_t][pt][t] +
            vp9_get_cost(t, e, cat6_high_cost);
406
        token_cache[rc] = vp9_pt_energy_class[t];
407
      }
408
      prev_t = t;
409
      if (!--band_left) {
410 411
        band_left = *band_count++;
        ++token_costs;
412
      }
413
    }
414 415

    // eob token
416
    if (band_left) {
417 418 419
      if (use_fast_coef_costing) {
        cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
      } else {
420
        pt = get_coef_context(nb, token_cache, c);
421 422
        cost += (*token_costs)[0][pt][EOB_TOKEN];
      }
423
    }
424 425
  }

426
  // is eob first coefficient;
427
  *A = *L = (c > 0);
428

429 430
  return cost;
}
431 432 433 434 435

#if CONFIG_VP9_HIGHBITDEPTH
static void dist_block(int plane, int block, TX_SIZE tx_size,
                       struct rdcost_block_args* args, int bd) {
#else
Alex Converse's avatar
Alex Converse committed
436 437
static void dist_block(int plane, int block, TX_SIZE tx_size,
                       struct rdcost_block_args* args) {
438
#endif  // CONFIG_VP9_HIGHBITDEPTH
439
  const int ss_txfrm_size = tx_size << 1;
Deb Mukherjee's avatar
Deb Mukherjee committed
440 441
  MACROBLOCK* const x = args->x;
  MACROBLOCKD* const xd = &x->e_mbd;
442 443
  const struct macroblock_plane *const p = &x->plane[plane];
  const struct macroblockd_plane *const pd = &xd->plane[plane];
Deb Mukherjee's avatar
Deb Mukherjee committed
444
  int64_t this_sse;
Alex Converse's avatar
Alex Converse committed
445
  int shift = tx_size == TX_32X32 ? 0 : 2;
446 447
  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
448
#if CONFIG_VP9_HIGHBITDEPTH
449 450
  args->dist = vp9_highbd_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
                                      &this_sse, bd) >> shift;
451
#else
452 453
  args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
                               &this_sse) >> shift;
454
#endif  // CONFIG_VP9_HIGHBITDEPTH
455
  args->sse  = this_sse >> shift;
456

457
  if (x->skip_encode && !is_inter_block(&xd->mi[0]->mbmi)) {
458 459
    // TODO(jingning): tune the model to better capture the distortion.
    int64_t p = (pd->dequant[1] * pd->dequant[1] *
460
                    (1 << ss_txfrm_size)) >> (shift + 2);
461 462 463 464 465
#if CONFIG_VP9_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      p >>= ((xd->bd - 8) * 2);
    }
#endif  // CONFIG_VP9_HIGHBITDEPTH
466 467
    args->dist += (p >> 4);
    args->sse  += p;
468
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
469 470
}

471
static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
Alex Converse's avatar
Alex Converse committed
472
                       TX_SIZE tx_size, struct rdcost_block_args* args) {
Deb Mukherjee's avatar
Deb Mukherjee committed
473
  int x_idx, y_idx;
Alex Converse's avatar
Alex Converse committed
474
  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x_idx, &y_idx);
Deb Mukherjee's avatar
Deb Mukherjee committed
475

476
  args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx,
Alex Converse's avatar
Alex Converse committed
477
                           args->t_left + y_idx, tx_size,
478 479
                           args->so->scan, args->so->neighbors,
                           args->use_fast_coef_costing);
Deb Mukherjee's avatar
Deb Mukherjee committed
480 481
}

482 483
static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
                          TX_SIZE tx_size, void *arg) {
Deb Mukherjee's avatar
Deb Mukherjee committed
484 485 486
  struct rdcost_block_args *args = arg;
  MACROBLOCK *const x = args->x;
  MACROBLOCKD *const xd = &x->e_mbd;
487
  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
488
  int64_t rd1, rd2, rd;
Deb Mukherjee's avatar
Deb Mukherjee committed
489

490 491 492
  if (args->skip)
    return;

493
  if (!is_inter_block(mbmi)) {
494 495
    struct encode_b_args arg = {x, NULL, &mbmi->skip};
    vp9_encode_block_intra(plane, block, plane_bsize, tx_size, &arg);
496 497 498 499 500 501 502
#if CONFIG_VP9_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      dist_block(plane, block, tx_size, args, xd->bd);
    } else {
      dist_block(plane, block, tx_size, args, 8);
    }
#else
503
    dist_block(plane, block, tx_size, args);
504
#endif  // CONFIG_VP9_HIGHBITDEPTH
505 506
  } else if (max_txsize_lookup[plane_bsize] == tx_size) {
    if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 0) {
507 508
      // full forward transform and quantization
      vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
509 510 511 512 513 514 515
#if CONFIG_VP9_HIGHBITDEPTH
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        dist_block(plane, block, tx_size, args, xd->bd);
      } else {
        dist_block(plane, block, tx_size, args, 8);
      }
#else
516
      dist_block(plane, block, tx_size, args);
517
#endif  // CONFIG_VP9_HIGHBITDEPTH
518
    } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 2) {
519
      // compute DC coefficient
520 521
      tran_low_t *const coeff   = BLOCK_OFFSET(x->plane[plane].coeff, block);
      tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
522
      vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size);
523
      args->sse  = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
524
      args->dist = args->sse;
525
      if (x->plane[plane].eobs[block]) {
Jingning Han's avatar
Jingning Han committed
526 527 528
        const int64_t orig_sse = (int64_t)coeff[0] * coeff[0];
        const int64_t resd_sse = coeff[0] - dqcoeff[0];
        int64_t dc_correct = orig_sse - resd_sse * resd_sse;
529 530 531
#if CONFIG_VP9_HIGHBITDEPTH
        dc_correct >>= ((xd->bd - 8) * 2);
#endif
532 533 534
        if (tx_size != TX_32X32)
          dc_correct >>= 2;

535
        args->dist = MAX(0, args->sse - dc_correct);
536
      }
537 538 539
    } else {
      // skip forward transform
      x->plane[plane].eobs[block] = 0;
540
      args->sse  = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
541 542
      args->dist = args->sse;
    }
543 544 545
  } else {
    // full forward transform and quantization
    vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
546 547 548 549 550 551 552
#if CONFIG_VP9_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      dist_block(plane, block, tx_size, args, xd->bd);
    } else {
      dist_block(plane, block, tx_size, args, 8);
    }
#else
553
    dist_block(plane, block, tx_size, args);
554
#endif  // CONFIG_VP9_HIGHBITDEPTH
555
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
556

557
  rate_block(plane, block, plane_bsize, tx_size, args);
558 559
  rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
  rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
560 561

  // TODO(jingning): temporarily enabled only for luma component
562
  rd = MIN(rd1, rd2);
Yaowu Xu's avatar
Yaowu Xu committed
563
  if (plane == 0)
564
    x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] ||
Yaowu Xu's avatar
Yaowu Xu committed
565
                                    (rd1 > rd2 && !xd->lossless);
566

567 568 569
  args->this_rate += args->rate;
  args->this_dist += args->dist;
  args->this_sse  += args->sse;
570 571 572 573 574 575
  args->this_rd += rd;

  if (args->this_rd > args->best_rd) {
    args->skip = 1;
    return;
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
576 577
}

578
static void txfm_rd_in_plane(MACROBLOCK *x,
579 580 581
                             int *rate, int64_t *distortion,
                             int *skippable, int64_t *sse,
                             int64_t ref_best_rd, int plane,
582 583
                             BLOCK_SIZE bsize, TX_SIZE tx_size,
                             int use_fast_coef_casting) {
Deb Mukherjee's avatar
Deb Mukherjee committed
584
  MACROBLOCKD *const xd = &x->e_mbd;
585
  const struct macroblockd_plane *const pd = &xd->plane[plane];
586 587
  struct rdcost_block_args args;
  vp9_zero(args);
588 589
  args.x = x;
  args.best_rd = ref_best_rd;
590
  args.use_fast_coef_costing = use_fast_coef_casting;
591

592
  if (plane == 0)
593
    xd->mi[0]->mbmi.tx_size = tx_size;
594

595
  vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
596

597
  args.so = get_scan(xd, tx_size, pd->plane_type, 0);
Deb Mukherjee's avatar
Deb Mukherjee committed
598

599
  vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
600 601
                                         block_rd_txfm, &args);
  if (args.skip) {
602 603 604 605 606
    *rate       = INT_MAX;
    *distortion = INT64_MAX;
    *sse        = INT64_MAX;
    *skippable  = 0;
  } else {
607 608 609
    *distortion = args.this_dist;
    *rate       = args.this_rate;
    *sse        = args.this_sse;
610
    *skippable  = vp9_is_skippable_in_plane(x, bsize, plane);
611
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
612 613
}

614 615 616 617 618
static void choose_largest_tx_size(VP9_COMP *cpi, MACROBLOCK *x,
                                   int *rate, int64_t *distortion,
                                   int *skip, int64_t *sse,
                                   int64_t ref_best_rd,
                                   BLOCK_SIZE bs) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
619
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
Deb Mukherjee's avatar
Deb Mukherjee committed
620
  VP9_COMMON *const cm = &cpi->common;
Yaowu Xu's avatar
Yaowu Xu committed
621
  const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
Deb Mukherjee's avatar
Deb Mukherjee committed
622
  MACROBLOCKD *const xd = &x->e_mbd;
623
  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Yaowu Xu's avatar
Yaowu Xu committed
624 625 626

  mbmi->tx_size = MIN(max_tx_size, largest_tx_size);

Alex Converse's avatar
Alex Converse committed
627
  txfm_rd_in_plane(x, rate, distortion, skip,
628
                   sse, ref_best_rd, 0, bs,
629
                   mbmi->tx_size, cpi->sf.use_fast_coef_costing);
Deb Mukherjee's avatar
Deb Mukherjee committed
630 631
}

632
static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
633 634 635 636
                                   int *rate,
                                   int64_t *distortion,
                                   int *skip,
                                   int64_t *psse,
637
                                   int64_t tx_cache[TX_MODES],
638
                                   int64_t ref_best_rd,
639
                                   BLOCK_SIZE bs) {
640
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
641 642
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
643
  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
644
  vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
645 646
  int r[TX_SIZES][2], s[TX_SIZES];
  int64_t d[TX_SIZES], sse[TX_SIZES];
647 648 649 650
  int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
                             {INT64_MAX, INT64_MAX},
                             {INT64_MAX, INT64_MAX},
                             {INT64_MAX, INT64_MAX}};
651
  int n, m;
652
  int s0, s1;
653 654
  const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
  int64_t best_rd = INT64_MAX;
655
  TX_SIZE best_tx = max_tx_size;
656

657
  const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs);
658 659 660
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
661

662
  for (n = max_tx_size; n >= 0;  n--) {
663 664 665
    txfm_rd_in_plane(x, &r[n][0], &d[n], &s[n],
                     &sse[n], ref_best_rd, 0, bs, n,
                     cpi->sf.use_fast_coef_costing);
666 667
    r[n][1] = r[n][0];
    if (r[n][0] < INT_MAX) {
668
      for (m = 0; m <= n - (n == (int) max_tx_size); m++) {
669 670 671 672 673 674
        if (m == n)
          r[n][1] += vp9_cost_zero(tx_probs[m]);
        else
          r[n][1] += vp9_cost_one(tx_probs[m]);
      }
    }
675 676
    if (d[n] == INT64_MAX) {
      rd[n][0] = rd[n][1] = INT64_MAX;
677
    } else if (s[n]) {
678 679 680 681
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
682
    }
683

684 685 686
    // Early termination in transform size search.
    if (cpi->sf.tx_size_search_breakout &&
        (rd[n][1] == INT64_MAX ||
687
        (n < (int) max_tx_size && rd[n][1] > rd[n + 1][1]) ||
688 689 690
        s[n] == 1))
      break;

691 692 693 694
    if (rd[n][1] < best_rd) {
      best_tx = n;
      best_rd = rd[n][1];
    }
695
  }
696 697
  mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
                      best_tx : MIN(max_tx_size, max_mode_tx_size);
698 699


700 701 702
  *distortion = d[mbmi->tx_size];
  *rate       = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT];
  *skip       = s[mbmi->tx_size];
703
  *psse       = sse[mbmi->tx_size];
704

705 706 707 708
  tx_cache[ONLY_4X4] = rd[TX_4X4][0];
  tx_cache[ALLOW_8X8] = rd[TX_8X8][0];
  tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0];
  tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0];
709

710 711 712 713
  if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
    tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
  } else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) {
    tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
Deb Mukherjee's avatar
Deb Mukherjee committed
714
  } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
715
    tx_cache[TX_MODE_SELECT] = rd[TX_8X8][1];
Deb Mukherjee's avatar
Deb Mukherjee committed
716
  } else {
717
    tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1];
718
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
719
}
720

721 722 723 724 725
static void super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
                            int64_t *distortion, int *skip,
                            int64_t *psse, BLOCK_SIZE bs,
                            int64_t txfm_cache[TX_MODES],
                            int64_t ref_best_rd) {
Jim Bankoski's avatar
Jim Bankoski committed
726
  MACROBLOCKD *xd = &x->e_mbd;
727 728
  int64_t sse;
  int64_t *ret_sse = psse ? psse : &sse;
Yaowu Xu's avatar
Yaowu Xu committed
729

730
  assert(bs == xd->mi[0]->mbmi.sb_type);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
731

732
  if (cpi->sf.tx_size_search_method == USE_LARGESTALL || xd->lossless) {
James Zern's avatar
James Zern committed
733
    memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
734
    choose_largest_tx_size(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd,
735
                           bs);
736
  } else {
737
    choose_tx_size_from_rd(cpi, x, rate, distortion, skip, ret_sse,
738
                           txfm_cache, ref_best_rd, bs);
739 740 741
  }
}

742 743
static int conditional_skipintra(PREDICTION_MODE mode,
                                 PREDICTION_MODE best_intra_mode) {
744 745 746 747 748 749 750 751
  if (mode == D117_PRED &&
      best_intra_mode != V_PRED &&
      best_intra_mode != D135_PRED)
    return 1;
  if (mode == D63_PRED &&
      best_intra_mode != V_PRED &&
      best_intra_mode != D45_PRED)
    return 1;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
752
  if (mode == D207_PRED &&
753 754 755 756 757 758 759 760 761 762
      best_intra_mode != H_PRED &&
      best_intra_mode != D45_PRED)
    return 1;
  if (mode == D153_PRED &&
      best_intra_mode != H_PRED &&
      best_intra_mode != D135_PRED)
    return 1;
  return 0;
}

763
static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
764
                                     PREDICTION_MODE *best_mode,
765
                                     const int *bmode_costs,
766 767
                                     ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                                     int *bestrate, int *bestratey,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
768
                                     int64_t *bestdistortion,
769
                                     BLOCK_SIZE bsize, int64_t rd_thresh) {
770
  PREDICTION_MODE mode;
771
  MACROBLOCKD *const xd = &x->e_mbd;
772
  int64_t best_rd = rd_thresh;
773

774 775 776
  struct macroblock_plane *p = &x->plane[0];
  struct macroblockd_plane *pd = &xd->plane[0];
  const int src_stride = p->src.stride;
777
  const int dst_stride = pd->dst.stride;
778 779 780 781
  const uint8_t *src_init = &p->src.buf[vp9_raster_block_offset(BLOCK_8X8, ib,
                                                                src_stride)];
  uint8_t *dst_init = &pd->dst.buf[vp9_raster_block_offset(BLOCK_8X8, ib,
                                                           dst_stride)];
782 783
  ENTROPY_CONTEXT ta[2], tempa[2];
  ENTROPY_CONTEXT tl[2], templ[2];
784

Jim Bankoski's avatar
Jim Bankoski committed
785 786
  const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
  const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];