vp9_rdopt.c 153 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5 6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9 10 11
 */

#include <assert.h>
12
#include <math.h>
13

14 15 16 17 18 19
#include "./vp9_rtcd.h"

#include "vpx_mem/vpx_mem.h"

#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_entropy.h"
20
#include "vp9/common/vp9_entropymode.h"
21 22 23 24
#include "vp9/common/vp9_idct.h"
#include "vp9/common/vp9_mvref_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_quant_common.h"
25 26
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
27 28 29
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_systemdependent.h"

Dmitry Kovalev's avatar
Dmitry Kovalev committed
30
#include "vp9/encoder/vp9_cost.h"
31
#include "vp9/encoder/vp9_encodemb.h"
32
#include "vp9/encoder/vp9_encodemv.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
33
#include "vp9/encoder/vp9_encoder.h"
34
#include "vp9/encoder/vp9_mcomp.h"
35
#include "vp9/encoder/vp9_quantize.h"
36
#include "vp9/encoder/vp9_ratectrl.h"
37
#include "vp9/encoder/vp9_rd.h"
38 39
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_variance.h"
Paul Wilkins's avatar
Paul Wilkins committed
40

41 42 43 44 45 46 47 48
#define LAST_FRAME_MODE_MASK    ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | \
                                 (1 << INTRA_FRAME))
#define GOLDEN_FRAME_MODE_MASK  ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | \
                                 (1 << INTRA_FRAME))
#define ALT_REF_MODE_MASK       ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | \
                                 (1 << INTRA_FRAME))

#define SECOND_REF_FRAME_MASK   ((1 << ALTREF_FRAME) | 0x01)
49

Paul Wilkins's avatar
Paul Wilkins committed
50 51
#define MIN_EARLY_TERM_INDEX    3

52
typedef struct {
53
  PREDICTION_MODE mode;
54 55 56 57 58 59 60
  MV_REFERENCE_FRAME ref_frame[2];
} MODE_DEFINITION;

typedef struct {
  MV_REFERENCE_FRAME ref_frame[2];
} REF_DEFINITION;

Alex Converse's avatar
Alex Converse committed
61 62 63 64 65 66 67 68 69 70 71 72 73
struct rdcost_block_args {
  MACROBLOCK *x;
  ENTROPY_CONTEXT t_above[16];
  ENTROPY_CONTEXT t_left[16];
  int rate;
  int64_t dist;
  int64_t sse;
  int this_rate;
  int64_t this_dist;
  int64_t this_sse;
  int64_t this_rd;
  int64_t best_rd;
  int skip;
74
  int use_fast_coef_costing;
75
  const scan_order *so;
Alex Converse's avatar
Alex Converse committed
76 77
};

78
static const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
79 80 81 82 83 84 85 86 87 88 89 90
  {NEARESTMV, {LAST_FRAME,   NONE}},
  {NEARESTMV, {ALTREF_FRAME, NONE}},
  {NEARESTMV, {GOLDEN_FRAME, NONE}},

  {DC_PRED,   {INTRA_FRAME,  NONE}},

  {NEWMV,     {LAST_FRAME,   NONE}},
  {NEWMV,     {ALTREF_FRAME, NONE}},
  {NEWMV,     {GOLDEN_FRAME, NONE}},

  {NEARMV,    {LAST_FRAME,   NONE}},
  {NEARMV,    {ALTREF_FRAME, NONE}},
Jingning Han's avatar
Jingning Han committed
91 92 93 94 95 96
  {NEARMV,    {GOLDEN_FRAME, NONE}},

  {ZEROMV,    {LAST_FRAME,   NONE}},
  {ZEROMV,    {GOLDEN_FRAME, NONE}},
  {ZEROMV,    {ALTREF_FRAME, NONE}},

97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
  {NEARESTMV, {LAST_FRAME,   ALTREF_FRAME}},
  {NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}},

  {TM_PRED,   {INTRA_FRAME,  NONE}},

  {NEARMV,    {LAST_FRAME,   ALTREF_FRAME}},
  {NEWMV,     {LAST_FRAME,   ALTREF_FRAME}},
  {NEARMV,    {GOLDEN_FRAME, ALTREF_FRAME}},
  {NEWMV,     {GOLDEN_FRAME, ALTREF_FRAME}},

  {ZEROMV,    {LAST_FRAME,   ALTREF_FRAME}},
  {ZEROMV,    {GOLDEN_FRAME, ALTREF_FRAME}},

  {H_PRED,    {INTRA_FRAME,  NONE}},
  {V_PRED,    {INTRA_FRAME,  NONE}},
  {D135_PRED, {INTRA_FRAME,  NONE}},
  {D207_PRED, {INTRA_FRAME,  NONE}},
  {D153_PRED, {INTRA_FRAME,  NONE}},
  {D63_PRED,  {INTRA_FRAME,  NONE}},
  {D117_PRED, {INTRA_FRAME,  NONE}},
  {D45_PRED,  {INTRA_FRAME,  NONE}},
118 119
};

120
static const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
121 122 123 124 125 126
  {{LAST_FRAME,   NONE}},
  {{GOLDEN_FRAME, NONE}},
  {{ALTREF_FRAME, NONE}},
  {{LAST_FRAME,   ALTREF_FRAME}},
  {{GOLDEN_FRAME, ALTREF_FRAME}},
  {{INTRA_FRAME,  NONE}},
John Koleszar's avatar
John Koleszar committed
127 128
};

129 130
static int raster_block_offset(BLOCK_SIZE plane_bsize,
                               int raster_block, int stride) {
131
  const int bw = b_width_log2_lookup[plane_bsize];
132 133 134 135 136 137
  const int y = 4 * (raster_block >> bw);
  const int x = 4 * (raster_block & ((1 << bw) - 1));
  return y * stride + x;
}
static int16_t* raster_block_offset_int16(BLOCK_SIZE plane_bsize,
                                          int raster_block, int16_t *base) {
138
  const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
139 140 141
  return base + raster_block_offset(plane_bsize, raster_block, stride);
}

142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
                           int m, int n, int min_plane, int max_plane) {
  int i;

  for (i = min_plane; i < max_plane; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &x->e_mbd.plane[i];

    p->coeff    = ctx->coeff_pbuf[i][m];
    p->qcoeff   = ctx->qcoeff_pbuf[i][m];
    pd->dqcoeff = ctx->dqcoeff_pbuf[i][m];
    p->eobs     = ctx->eobs_pbuf[i][m];

    ctx->coeff_pbuf[i][m]   = ctx->coeff_pbuf[i][n];
    ctx->qcoeff_pbuf[i][m]  = ctx->qcoeff_pbuf[i][n];
    ctx->dqcoeff_pbuf[i][m] = ctx->dqcoeff_pbuf[i][n];
    ctx->eobs_pbuf[i][m]    = ctx->eobs_pbuf[i][n];

    ctx->coeff_pbuf[i][n]   = p->coeff;
    ctx->qcoeff_pbuf[i][n]  = p->qcoeff;
    ctx->dqcoeff_pbuf[i][n] = pd->dqcoeff;
    ctx->eobs_pbuf[i][n]    = p->eobs;
  }
}

167
static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
Deb Mukherjee's avatar
Deb Mukherjee committed
168
                            MACROBLOCK *x, MACROBLOCKD *xd,
169 170
                            int *out_rate_sum, int64_t *out_dist_sum,
                            int *skip_txfm_sb, int64_t *skip_sse_sb) {
Deb Mukherjee's avatar
Deb Mukherjee committed
171 172 173
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
174 175 176
  int i;
  int64_t rate_sum = 0;
  int64_t dist_sum = 0;
hkuang's avatar
hkuang committed
177
  const int ref = xd->mi[0].src_mi->mbmi.ref_frame[0];
178
  unsigned int sse;
179
  unsigned int var = 0;
180
  unsigned int sum_sse = 0;
181 182
  int64_t total_sse = 0;
  int skip_flag = 1;
183
  const int shift = 6;
184 185 186 187
  int rate;
  int64_t dist;

  x->pred_sse[ref] = 0;
Deb Mukherjee's avatar
Deb Mukherjee committed
188 189 190 191

  for (i = 0; i < MAX_MB_PLANE; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &xd->plane[i];
192
    const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
193 194
    const TX_SIZE max_tx_size = max_txsize_lookup[bs];
    const BLOCK_SIZE unit_size = txsize_to_bsize[max_tx_size];
195 196 197 198 199 200
    const int64_t dc_thr = p->quant_thred[0] >> shift;
    const int64_t ac_thr = p->quant_thred[1] >> shift;
    // The low thresholds are used to measure if the prediction errors are
    // low enough so that we can skip the mode search.
    const int64_t low_dc_thr = MIN(50, dc_thr >> 2);
    const int64_t low_ac_thr = MIN(80, ac_thr >> 2);
201 202 203 204 205 206
    int bw = 1 << (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
    int bh = 1 << (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
    int idx, idy;
    int lw = b_width_log2_lookup[unit_size] + 2;
    int lh = b_height_log2_lookup[unit_size] + 2;

207
    sum_sse = 0;
208 209 210 211 212

    for (idy = 0; idy < bh; ++idy) {
      for (idx = 0; idx < bw; ++idx) {
        uint8_t *src = p->src.buf + (idy * p->src.stride << lh) + (idx << lw);
        uint8_t *dst = pd->dst.buf + (idy * pd->dst.stride << lh) + (idx << lh);
213
        int block_idx = (idy << 1) + idx;
214
        int low_err_skip = 0;
215 216 217 218 219 220

        var = cpi->fn_ptr[unit_size].vf(src, p->src.stride,
                                        dst, pd->dst.stride, &sse);
        x->bsse[(i << 2) + block_idx] = sse;
        sum_sse += sse;

221
        x->skip_txfm[(i << 2) + block_idx] = 0;
222
        if (!x->select_tx_size) {
223
          // Check if all ac coefficients can be quantized to zero.
224
          if (var < ac_thr || var == 0) {
225
            x->skip_txfm[(i << 2) + block_idx] = 2;
226 227

            // Check if dc coefficient can be quantized to zero.
228
            if (sse - var < dc_thr || sse == var) {
229
              x->skip_txfm[(i << 2) + block_idx] = 1;
230 231 232 233

              if (!sse || (var < low_ac_thr && sse - var < low_dc_thr))
                low_err_skip = 1;
            }
234
          }
235
        }
236

237 238 239
        if (skip_flag && !low_err_skip)
          skip_flag = 0;

240 241 242 243
        if (i == 0)
          x->pred_sse[ref] += sse;
      }
    }
244

245 246
    total_sse += sum_sse;

247
    // Fast approximate the modelling function.
248
    if (cpi->oxcf.speed > 4) {
249
      int64_t rate;
250
      const int64_t square_error = sum_sse;
251
      int quantizer = (pd->dequant[1] >> 3);
252 253 254 255 256
#if CONFIG_VP9_HIGHBITDEPTH
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        quantizer >>= (xd->bd - 8);
      }
#endif  // CONFIG_VP9_HIGHBITDEPTH
257

258 259
      if (quantizer < 120)
        rate = (square_error * (280 - quantizer)) >> 8;
260 261 262 263 264
      else
        rate = 0;
      dist = (square_error * quantizer) >> 8;
      rate_sum += rate;
      dist_sum += dist;
265
    } else {
266 267
#if CONFIG_VP9_HIGHBITDEPTH
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
Deb Mukherjee's avatar
Deb Mukherjee committed
268
        vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],
269 270 271
                                     pd->dequant[1] >> (xd->bd - 5),
                                     &rate, &dist);
      } else {
Deb Mukherjee's avatar
Deb Mukherjee committed
272
        vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],
273 274 275
                                     pd->dequant[1] >> 3, &rate, &dist);
      }
#else
276
      vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],
277
                                   pd->dequant[1] >> 3, &rate, &dist);
278
#endif  // CONFIG_VP9_HIGHBITDEPTH
279
      rate_sum += rate;
280
      dist_sum += dist;
281
    }
Deb Mukherjee's avatar
Deb Mukherjee committed
282 283
  }

284 285
  *skip_txfm_sb = skip_flag;
  *skip_sse_sb = total_sse << 4;
286 287
  *out_rate_sum = (int)rate_sum;
  *out_dist_sum = dist_sum << 4;
Deb Mukherjee's avatar
Deb Mukherjee committed
288 289
}

290
int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
291
                          intptr_t block_size, int64_t *ssz) {
Ronald S. Bultje's avatar
Ronald S. Bultje committed
292
  int i;
293
  int64_t error = 0, sqcoeff = 0;
John Koleszar's avatar
John Koleszar committed
294

295
  for (i = 0; i < block_size; i++) {
296 297 298
    const int diff = coeff[i] - dqcoeff[i];
    error +=  diff * diff;
    sqcoeff += coeff[i] * coeff[i];
John Koleszar's avatar
John Koleszar committed
299
  }
John Koleszar's avatar
John Koleszar committed
300

301
  *ssz = sqcoeff;
John Koleszar's avatar
John Koleszar committed
302
  return error;
John Koleszar's avatar
John Koleszar committed
303 304
}

305 306

#if CONFIG_VP9_HIGHBITDEPTH
307 308 309 310
int64_t vp9_highbd_block_error_c(const tran_low_t *coeff,
                                 const tran_low_t *dqcoeff,
                                 intptr_t block_size,
                                 int64_t *ssz, int bd) {
311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329
  int i;
  int64_t error = 0, sqcoeff = 0;
  int shift = 2 * (bd - 8);
  int rounding = shift > 0 ? 1 << (shift - 1) : 0;

  for (i = 0; i < block_size; i++) {
    const int64_t diff = coeff[i] - dqcoeff[i];
    error +=  diff * diff;
    sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
  }
  assert(error >= 0 && sqcoeff >= 0);
  error = (error + rounding) >> shift;
  sqcoeff = (sqcoeff + rounding) >> shift;

  *ssz = sqcoeff;
  return error;
}
#endif  // CONFIG_VP9_HIGHBITDEPTH

330 331 332 333 334
/* The trailing '0' is a terminator which is used inside cost_coeffs() to
 * decide whether to include cost of a trailing EOB node or not (i.e. we
 * can skip this if the last coefficient in this transform block, e.g. the
 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
 * were non-zero). */
335
static const int16_t band_counts[TX_SIZES][8] = {
336 337 338 339
  { 1, 2, 3, 4,  3,   16 - 13, 0 },
  { 1, 2, 3, 4, 11,   64 - 21, 0 },
  { 1, 2, 3, 4, 11,  256 - 21, 0 },
  { 1, 2, 3, 4, 11, 1024 - 21, 0 },
340
};
341
static INLINE int cost_coeffs(MACROBLOCK *x,
342
                              int plane, int block,
343
                              ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
John Koleszar's avatar
John Koleszar committed
344
                              TX_SIZE tx_size,
345 346
                              const int16_t *scan, const int16_t *nb,
                              int use_fast_coef_costing) {
347
  MACROBLOCKD *const xd = &x->e_mbd;
hkuang's avatar
hkuang committed
348
  MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
349 350
  const struct macroblock_plane *p = &x->plane[plane];
  const struct macroblockd_plane *pd = &xd->plane[plane];
351
  const PLANE_TYPE type = pd->plane_type;
352
  const int16_t *band_count = &band_counts[tx_size][1];
353
  const int eob = p->eobs[block];
354
  const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
355
  unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
Dmitry Kovalev's avatar
Dmitry Kovalev committed
356
                   x->token_costs[tx_size][type][is_inter_block(mbmi)];
357
  uint8_t token_cache[32 * 32];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
358
  int pt = combine_entropy_contexts(*A, *L);
359
  int c, cost;
360
  // Check for consistency of tx_size with mode info
361
  assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
362
                              : get_uv_tx_size(mbmi, pd) == tx_size);
363

364 365
  if (eob == 0) {
    // single eob token
366
    cost = token_costs[0][0][pt][EOB_TOKEN];
367
    c = 0;
368
  } else {
369
    int band_left = *band_count++;
370 371

    // dc token
Dmitry Kovalev's avatar
Dmitry Kovalev committed
372
    int v = qcoeff[0];
373
    int prev_t = vp9_dct_value_tokens_ptr[v].token;
374
    cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
375
    token_cache[0] = vp9_pt_energy_class[prev_t];
376
    ++token_costs;
377 378 379 380

    // ac tokens
    for (c = 1; c < eob; c++) {
      const int rc = scan[c];
381
      int t;
382

Dmitry Kovalev's avatar
Dmitry Kovalev committed
383
      v = qcoeff[rc];
384
      t = vp9_dct_value_tokens_ptr[v].token;
385 386 387
      if (use_fast_coef_costing) {
        cost += (*token_costs)[!prev_t][!prev_t][t] + vp9_dct_value_cost_ptr[v];
      } else {
388
        pt = get_coef_context(nb, token_cache, c);
389
        cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
390
        token_cache[rc] = vp9_pt_energy_class[t];
391
      }
392
      prev_t = t;
393
      if (!--band_left) {
394 395
        band_left = *band_count++;
        ++token_costs;
396
      }
397
    }
398 399

    // eob token
400
    if (band_left) {
401 402 403
      if (use_fast_coef_costing) {
        cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
      } else {
404
        pt = get_coef_context(nb, token_cache, c);
405 406
        cost += (*token_costs)[0][pt][EOB_TOKEN];
      }
407
    }
408 409
  }

410
  // is eob first coefficient;
411
  *A = *L = (c > 0);
412

413 414
  return cost;
}
415 416 417 418 419

#if CONFIG_VP9_HIGHBITDEPTH
static void dist_block(int plane, int block, TX_SIZE tx_size,
                       struct rdcost_block_args* args, int bd) {
#else
Alex Converse's avatar
Alex Converse committed
420 421
static void dist_block(int plane, int block, TX_SIZE tx_size,
                       struct rdcost_block_args* args) {
422
#endif  // CONFIG_VP9_HIGHBITDEPTH
423
  const int ss_txfrm_size = tx_size << 1;
Deb Mukherjee's avatar
Deb Mukherjee committed
424 425
  MACROBLOCK* const x = args->x;
  MACROBLOCKD* const xd = &x->e_mbd;
426 427
  const struct macroblock_plane *const p = &x->plane[plane];
  const struct macroblockd_plane *const pd = &xd->plane[plane];
Deb Mukherjee's avatar
Deb Mukherjee committed
428
  int64_t this_sse;
Alex Converse's avatar
Alex Converse committed
429
  int shift = tx_size == TX_32X32 ? 0 : 2;
430 431
  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
432
#if CONFIG_VP9_HIGHBITDEPTH
433 434
  args->dist = vp9_highbd_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
                                      &this_sse, bd) >> shift;
435
#else
436 437
  args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
                               &this_sse) >> shift;
438
#endif  // CONFIG_VP9_HIGHBITDEPTH
439
  args->sse  = this_sse >> shift;
440

hkuang's avatar
hkuang committed
441
  if (x->skip_encode && !is_inter_block(&xd->mi[0].src_mi->mbmi)) {
442 443
    // TODO(jingning): tune the model to better capture the distortion.
    int64_t p = (pd->dequant[1] * pd->dequant[1] *
444
                    (1 << ss_txfrm_size)) >> (shift + 2);
445 446 447 448 449
#if CONFIG_VP9_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      p >>= ((xd->bd - 8) * 2);
    }
#endif  // CONFIG_VP9_HIGHBITDEPTH
450 451
    args->dist += (p >> 4);
    args->sse  += p;
452
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
453 454
}

455
static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
Alex Converse's avatar
Alex Converse committed
456
                       TX_SIZE tx_size, struct rdcost_block_args* args) {
Deb Mukherjee's avatar
Deb Mukherjee committed
457
  int x_idx, y_idx;
Alex Converse's avatar
Alex Converse committed
458
  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x_idx, &y_idx);
Deb Mukherjee's avatar
Deb Mukherjee committed
459

460
  args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx,
Alex Converse's avatar
Alex Converse committed
461
                           args->t_left + y_idx, tx_size,
462 463
                           args->so->scan, args->so->neighbors,
                           args->use_fast_coef_costing);
Deb Mukherjee's avatar
Deb Mukherjee committed
464 465
}

466 467
static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
                          TX_SIZE tx_size, void *arg) {
Deb Mukherjee's avatar
Deb Mukherjee committed
468 469 470
  struct rdcost_block_args *args = arg;
  MACROBLOCK *const x = args->x;
  MACROBLOCKD *const xd = &x->e_mbd;
hkuang's avatar
hkuang committed
471
  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
472
  int64_t rd1, rd2, rd;
Deb Mukherjee's avatar
Deb Mukherjee committed
473

474 475 476
  if (args->skip)
    return;

477
  if (!is_inter_block(mbmi)) {
478 479
    struct encode_b_args arg = {x, NULL, &mbmi->skip};
    vp9_encode_block_intra(plane, block, plane_bsize, tx_size, &arg);
480 481 482 483 484 485 486
#if CONFIG_VP9_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      dist_block(plane, block, tx_size, args, xd->bd);
    } else {
      dist_block(plane, block, tx_size, args, 8);
    }
#else
487
    dist_block(plane, block, tx_size, args);
488
#endif  // CONFIG_VP9_HIGHBITDEPTH
489 490
  } else if (max_txsize_lookup[plane_bsize] == tx_size) {
    if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 0) {
491 492
      // full forward transform and quantization
      vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
493 494 495 496 497 498 499
#if CONFIG_VP9_HIGHBITDEPTH
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        dist_block(plane, block, tx_size, args, xd->bd);
      } else {
        dist_block(plane, block, tx_size, args, 8);
      }
#else
500
      dist_block(plane, block, tx_size, args);
501
#endif  // CONFIG_VP9_HIGHBITDEPTH
502
    } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 2) {
503
      // compute DC coefficient
504 505
      tran_low_t *const coeff   = BLOCK_OFFSET(x->plane[plane].coeff, block);
      tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
506
      vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size);
507
      args->sse  = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
508
      args->dist = args->sse;
509 510 511
      if (x->plane[plane].eobs[block]) {
        int64_t dc_correct = coeff[0] * coeff[0] -
            (coeff[0] - dqcoeff[0]) * (coeff[0] - dqcoeff[0]);
512 513 514
#if CONFIG_VP9_HIGHBITDEPTH
        dc_correct >>= ((xd->bd - 8) * 2);
#endif
515 516 517
        if (tx_size != TX_32X32)
          dc_correct >>= 2;

518
        args->dist = MAX(0, args->sse - dc_correct);
519
      }
520 521 522
    } else {
      // skip forward transform
      x->plane[plane].eobs[block] = 0;
523
      args->sse  = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
524 525
      args->dist = args->sse;
    }
526 527 528
  } else {
    // full forward transform and quantization
    vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
529 530 531 532 533 534 535
#if CONFIG_VP9_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      dist_block(plane, block, tx_size, args, xd->bd);
    } else {
      dist_block(plane, block, tx_size, args, 8);
    }
#else
536
    dist_block(plane, block, tx_size, args);
537
#endif  // CONFIG_VP9_HIGHBITDEPTH
538
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
539

540
  rate_block(plane, block, plane_bsize, tx_size, args);
541 542
  rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
  rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
543 544

  // TODO(jingning): temporarily enabled only for luma component
545
  rd = MIN(rd1, rd2);
Yaowu Xu's avatar
Yaowu Xu committed
546
  if (plane == 0)
547
    x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] ||
Yaowu Xu's avatar
Yaowu Xu committed
548
                                    (rd1 > rd2 && !xd->lossless);
549

550 551 552
  args->this_rate += args->rate;
  args->this_dist += args->dist;
  args->this_sse  += args->sse;
553 554 555 556 557 558
  args->this_rd += rd;

  if (args->this_rd > args->best_rd) {
    args->skip = 1;
    return;
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
559 560
}

561
static void txfm_rd_in_plane(MACROBLOCK *x,
562 563 564
                             int *rate, int64_t *distortion,
                             int *skippable, int64_t *sse,
                             int64_t ref_best_rd, int plane,
565 566
                             BLOCK_SIZE bsize, TX_SIZE tx_size,
                             int use_fast_coef_casting) {
Deb Mukherjee's avatar
Deb Mukherjee committed
567
  MACROBLOCKD *const xd = &x->e_mbd;
568
  const struct macroblockd_plane *const pd = &xd->plane[plane];
569 570
  struct rdcost_block_args args;
  vp9_zero(args);
571 572
  args.x = x;
  args.best_rd = ref_best_rd;
573
  args.use_fast_coef_costing = use_fast_coef_casting;
574

575
  if (plane == 0)
hkuang's avatar
hkuang committed
576
    xd->mi[0].src_mi->mbmi.tx_size = tx_size;
577

578
  vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
579

580
  args.so = get_scan(xd, tx_size, pd->plane_type, 0);
Deb Mukherjee's avatar
Deb Mukherjee committed
581

582
  vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
583 584
                                         block_rd_txfm, &args);
  if (args.skip) {
585 586 587 588 589
    *rate       = INT_MAX;
    *distortion = INT64_MAX;
    *sse        = INT64_MAX;
    *skippable  = 0;
  } else {
590 591 592
    *distortion = args.this_dist;
    *rate       = args.this_rate;
    *sse        = args.this_sse;
593
    *skippable  = vp9_is_skippable_in_plane(x, bsize, plane);
594
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
595 596
}

597 598 599 600 601
static void choose_largest_tx_size(VP9_COMP *cpi, MACROBLOCK *x,
                                   int *rate, int64_t *distortion,
                                   int *skip, int64_t *sse,
                                   int64_t ref_best_rd,
                                   BLOCK_SIZE bs) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
602
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
Deb Mukherjee's avatar
Deb Mukherjee committed
603
  VP9_COMMON *const cm = &cpi->common;
Yaowu Xu's avatar
Yaowu Xu committed
604
  const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
Deb Mukherjee's avatar
Deb Mukherjee committed
605
  MACROBLOCKD *const xd = &x->e_mbd;
hkuang's avatar
hkuang committed
606
  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
Yaowu Xu's avatar
Yaowu Xu committed
607 608 609

  mbmi->tx_size = MIN(max_tx_size, largest_tx_size);

Alex Converse's avatar
Alex Converse committed
610
  txfm_rd_in_plane(x, rate, distortion, skip,
611
                   sse, ref_best_rd, 0, bs,
612
                   mbmi->tx_size, cpi->sf.use_fast_coef_costing);
Deb Mukherjee's avatar
Deb Mukherjee committed
613 614
}

615
static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
616 617 618 619
                                   int *rate,
                                   int64_t *distortion,
                                   int *skip,
                                   int64_t *psse,
620
                                   int64_t tx_cache[TX_MODES],
621
                                   int64_t ref_best_rd,
622
                                   BLOCK_SIZE bs) {
623
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
624 625
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
hkuang's avatar
hkuang committed
626
  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
627
  vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
628 629
  int r[TX_SIZES][2], s[TX_SIZES];
  int64_t d[TX_SIZES], sse[TX_SIZES];
630 631 632 633
  int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
                             {INT64_MAX, INT64_MAX},
                             {INT64_MAX, INT64_MAX},
                             {INT64_MAX, INT64_MAX}};
634
  int n, m;
635
  int s0, s1;
636 637
  const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
  int64_t best_rd = INT64_MAX;
638
  TX_SIZE best_tx = max_tx_size;
639

640
  const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs);
641 642 643
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
644

645
  for (n = max_tx_size; n >= 0;  n--) {
646 647 648
    txfm_rd_in_plane(x, &r[n][0], &d[n], &s[n],
                     &sse[n], ref_best_rd, 0, bs, n,
                     cpi->sf.use_fast_coef_costing);
649 650
    r[n][1] = r[n][0];
    if (r[n][0] < INT_MAX) {
651
      for (m = 0; m <= n - (n == (int) max_tx_size); m++) {
652 653 654 655 656 657
        if (m == n)
          r[n][1] += vp9_cost_zero(tx_probs[m]);
        else
          r[n][1] += vp9_cost_one(tx_probs[m]);
      }
    }
658 659
    if (d[n] == INT64_MAX) {
      rd[n][0] = rd[n][1] = INT64_MAX;
660
    } else if (s[n]) {
661 662 663 664
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
665
    }
666

667 668 669
    // Early termination in transform size search.
    if (cpi->sf.tx_size_search_breakout &&
        (rd[n][1] == INT64_MAX ||
670
        (n < (int) max_tx_size && rd[n][1] > rd[n + 1][1]) ||
671 672 673
        s[n] == 1))
      break;

674 675 676 677
    if (rd[n][1] < best_rd) {
      best_tx = n;
      best_rd = rd[n][1];
    }
678
  }
679 680
  mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
                      best_tx : MIN(max_tx_size, max_mode_tx_size);
681 682


683 684 685
  *distortion = d[mbmi->tx_size];
  *rate       = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT];
  *skip       = s[mbmi->tx_size];
686
  *psse       = sse[mbmi->tx_size];
687

688 689 690 691
  tx_cache[ONLY_4X4] = rd[TX_4X4][0];
  tx_cache[ALLOW_8X8] = rd[TX_8X8][0];
  tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0];
  tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0];
692

693 694 695 696
  if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
    tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
  } else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) {
    tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
Deb Mukherjee's avatar
Deb Mukherjee committed
697
  } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
698
    tx_cache[TX_MODE_SELECT] = rd[TX_8X8][1];
Deb Mukherjee's avatar
Deb Mukherjee committed
699
  } else {
700
    tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1];
701
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
702
}
703

704 705 706 707 708
static void super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
                            int64_t *distortion, int *skip,
                            int64_t *psse, BLOCK_SIZE bs,
                            int64_t txfm_cache[TX_MODES],
                            int64_t ref_best_rd) {
Jim Bankoski's avatar
Jim Bankoski committed
709
  MACROBLOCKD *xd = &x->e_mbd;
710 711
  int64_t sse;
  int64_t *ret_sse = psse ? psse : &sse;
Yaowu Xu's avatar
Yaowu Xu committed
712

hkuang's avatar
hkuang committed
713
  assert(bs == xd->mi[0].src_mi->mbmi.sb_type);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
714

715
  if (cpi->sf.tx_size_search_method == USE_LARGESTALL || xd->lossless) {
716
    vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
717
    choose_largest_tx_size(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd,
718
                           bs);
719
  } else {
720
    choose_tx_size_from_rd(cpi, x, rate, distortion, skip, ret_sse,
721
                           txfm_cache, ref_best_rd, bs);
722 723 724
  }
}

725 726
static int conditional_skipintra(PREDICTION_MODE mode,
                                 PREDICTION_MODE best_intra_mode) {
727 728 729 730 731 732 733 734
  if (mode == D117_PRED &&
      best_intra_mode != V_PRED &&
      best_intra_mode != D135_PRED)
    return 1;
  if (mode == D63_PRED &&
      best_intra_mode != V_PRED &&
      best_intra_mode != D45_PRED)
    return 1;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
735
  if (mode == D207_PRED &&
736 737 738 739 740 741 742 743 744 745
      best_intra_mode != H_PRED &&
      best_intra_mode != D45_PRED)
    return 1;
  if (mode == D153_PRED &&
      best_intra_mode != H_PRED &&
      best_intra_mode != D135_PRED)
    return 1;
  return 0;
}

746
static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
747
                                     PREDICTION_MODE *best_mode,
748
                                     const int *bmode_costs,
749 750
                                     ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                                     int *bestrate, int *bestratey,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
751
                                     int64_t *bestdistortion,
752
                                     BLOCK_SIZE bsize, int64_t rd_thresh) {
753
  PREDICTION_MODE mode;
754
  MACROBLOCKD *const xd = &x->e_mbd;
755
  int64_t best_rd = rd_thresh;
756

757 758 759
  struct macroblock_plane *p = &x->plane[0];
  struct macroblockd_plane *pd = &xd->plane[0];
  const int src_stride = p->src.stride;
760
  const int dst_stride = pd->dst.stride;
761 762 763 764
  const uint8_t *src_init = &p->src.buf[raster_block_offset(BLOCK_8X8, ib,
                                                            src_stride)];
  uint8_t *dst_init = &pd->dst.buf[raster_block_offset(BLOCK_8X8, ib,
                                                       dst_stride)];
765 766
  ENTROPY_CONTEXT ta[2], tempa[2];
  ENTROPY_CONTEXT tl[2], templ[2];
767

Jim Bankoski's avatar
Jim Bankoski committed
768 769
  const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
  const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
770
  int idx, idy;
771
  uint8_t best_dst[8 * 8];
772 773 774
#if CONFIG_VP9_HIGHBITDEPTH
  uint16_t best_dst16[8 * 8];
#endif
John Koleszar's avatar
John Koleszar committed
775

Jingning Han's avatar
Jingning Han committed
776
  assert(ib < 4);
777

778 779
  vpx_memcpy(ta, a, sizeof(ta));
  vpx_memcpy(tl, l, sizeof(tl));
hkuang's avatar
hkuang committed
780
  xd->mi[0].src_mi->mbmi.tx_size = TX_4X4;
781

782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816
#if CONFIG_VP9_HIGHBITDEPTH
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
      int64_t this_rd;
      int ratey = 0;
      int64_t distortion = 0;
      int rate = bmode_costs[mode];

      if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
        continue;

      // Only do the oblique modes if the best so far is
      // one of the neighboring directional modes
      if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
        if (conditional_skipintra(mode, *best_mode))
            continue;
      }

      vpx_memcpy(tempa, ta, sizeof(ta));
      vpx_memcpy(templ, tl, sizeof(tl));

      for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
        for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
          const int block = ib + idy * 2 + idx;
          const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
          uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
          int16_t *const src_diff = raster_block_offset_int16(BLOCK_8X8, block,
                                                              p->src_diff);
          tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
          xd->mi[0].src_mi->bmi[block].as_mode = mode;
          vp9_predict_intra_block(xd, block, 1,
                                  TX_4X4, mode,
                                  x->skip_encode ? src : dst,
                                  x->skip_encode ? src_stride : dst_stride,
                                  dst, dst_stride, idx, idy, 0);
817 818
          vp9_highbd_subtract_block(4, 4, src_diff, 8, src, src_stride,
                                    dst, dst_stride, xd->bd);
819 820
          if (xd->lossless) {
            const scan_order *so = &vp9_default_scan_orders[TX_4X4];
821
            vp9_highbd_fwht4x4(src_diff, coeff, 8);
822 823 824 825 826 827
            vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
            ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
                                 so->scan, so->neighbors,
                                 cpi->sf.use_fast_coef_costing);
            if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
              goto next_highbd;
828 829 830
            vp9_highbd_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block),
                                   dst, dst_stride,
                                   p->eobs[block], xd->bd);
831 832 833 834
          } else {
            int64_t unused;
            const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block);
            const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type];
835
            vp9_highbd_fht4x4(src_diff, coeff, 8, tx_type);
836 837 838 839
            vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
            ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
                                 so->scan, so->neighbors,
                                 cpi->sf.use_fast_coef_costing);
840 841 842
            distortion += vp9_highbd_block_error(
                coeff, BLOCK_OFFSET(pd->dqcoeff, block),
                16, &unused, xd->bd) >> 2;
843 844
            if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
              goto next_highbd;
845 846
            vp9_highbd_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block),
                                  dst, dst_stride, p->eobs[block], xd->bd);
847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864