vp9_rdopt.c 154 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5 6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9 10 11
 */

#include <assert.h>
12
#include <math.h>
13

14 15 16 17 18 19
#include "./vp9_rtcd.h"

#include "vpx_mem/vpx_mem.h"

#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_entropy.h"
20
#include "vp9/common/vp9_entropymode.h"
21 22 23 24
#include "vp9/common/vp9_idct.h"
#include "vp9/common/vp9_mvref_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_quant_common.h"
25 26
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
27 28 29
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_systemdependent.h"

Dmitry Kovalev's avatar
Dmitry Kovalev committed
30
#include "vp9/encoder/vp9_cost.h"
31
#include "vp9/encoder/vp9_encodemb.h"
32
#include "vp9/encoder/vp9_encodemv.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
33
#include "vp9/encoder/vp9_encoder.h"
34
#include "vp9/encoder/vp9_mcomp.h"
35
#include "vp9/encoder/vp9_quantize.h"
36
#include "vp9/encoder/vp9_ratectrl.h"
37
#include "vp9/encoder/vp9_rd.h"
38 39
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_variance.h"
Paul Wilkins's avatar
Paul Wilkins committed
40

41 42
#define RD_THRESH_MAX_FACT 64
#define RD_THRESH_INC      1
43

44 45 46 47 48 49 50 51
#define LAST_FRAME_MODE_MASK    ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | \
                                 (1 << INTRA_FRAME))
#define GOLDEN_FRAME_MODE_MASK  ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | \
                                 (1 << INTRA_FRAME))
#define ALT_REF_MODE_MASK       ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | \
                                 (1 << INTRA_FRAME))

#define SECOND_REF_FRAME_MASK   ((1 << ALTREF_FRAME) | 0x01)
52

Paul Wilkins's avatar
Paul Wilkins committed
53 54
#define MIN_EARLY_TERM_INDEX    3

55
typedef struct {
56
  PREDICTION_MODE mode;
57 58 59 60 61 62 63
  MV_REFERENCE_FRAME ref_frame[2];
} MODE_DEFINITION;

typedef struct {
  MV_REFERENCE_FRAME ref_frame[2];
} REF_DEFINITION;

Alex Converse's avatar
Alex Converse committed
64 65 66 67 68 69 70 71 72 73 74 75 76
struct rdcost_block_args {
  MACROBLOCK *x;
  ENTROPY_CONTEXT t_above[16];
  ENTROPY_CONTEXT t_left[16];
  int rate;
  int64_t dist;
  int64_t sse;
  int this_rate;
  int64_t this_dist;
  int64_t this_sse;
  int64_t this_rd;
  int64_t best_rd;
  int skip;
77
  int use_fast_coef_costing;
78
  const scan_order *so;
Alex Converse's avatar
Alex Converse committed
79 80
};

81
static const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
82 83 84 85 86 87 88 89 90 91 92 93
  {NEARESTMV, {LAST_FRAME,   NONE}},
  {NEARESTMV, {ALTREF_FRAME, NONE}},
  {NEARESTMV, {GOLDEN_FRAME, NONE}},

  {DC_PRED,   {INTRA_FRAME,  NONE}},

  {NEWMV,     {LAST_FRAME,   NONE}},
  {NEWMV,     {ALTREF_FRAME, NONE}},
  {NEWMV,     {GOLDEN_FRAME, NONE}},

  {NEARMV,    {LAST_FRAME,   NONE}},
  {NEARMV,    {ALTREF_FRAME, NONE}},
Jingning Han's avatar
Jingning Han committed
94 95 96 97 98 99
  {NEARMV,    {GOLDEN_FRAME, NONE}},

  {ZEROMV,    {LAST_FRAME,   NONE}},
  {ZEROMV,    {GOLDEN_FRAME, NONE}},
  {ZEROMV,    {ALTREF_FRAME, NONE}},

100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
  {NEARESTMV, {LAST_FRAME,   ALTREF_FRAME}},
  {NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}},

  {TM_PRED,   {INTRA_FRAME,  NONE}},

  {NEARMV,    {LAST_FRAME,   ALTREF_FRAME}},
  {NEWMV,     {LAST_FRAME,   ALTREF_FRAME}},
  {NEARMV,    {GOLDEN_FRAME, ALTREF_FRAME}},
  {NEWMV,     {GOLDEN_FRAME, ALTREF_FRAME}},

  {ZEROMV,    {LAST_FRAME,   ALTREF_FRAME}},
  {ZEROMV,    {GOLDEN_FRAME, ALTREF_FRAME}},

  {H_PRED,    {INTRA_FRAME,  NONE}},
  {V_PRED,    {INTRA_FRAME,  NONE}},
  {D135_PRED, {INTRA_FRAME,  NONE}},
  {D207_PRED, {INTRA_FRAME,  NONE}},
  {D153_PRED, {INTRA_FRAME,  NONE}},
  {D63_PRED,  {INTRA_FRAME,  NONE}},
  {D117_PRED, {INTRA_FRAME,  NONE}},
  {D45_PRED,  {INTRA_FRAME,  NONE}},
121 122
};

123
static const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
124 125 126 127 128 129
  {{LAST_FRAME,   NONE}},
  {{GOLDEN_FRAME, NONE}},
  {{ALTREF_FRAME, NONE}},
  {{LAST_FRAME,   ALTREF_FRAME}},
  {{GOLDEN_FRAME, ALTREF_FRAME}},
  {{INTRA_FRAME,  NONE}},
John Koleszar's avatar
John Koleszar committed
130 131
};

132 133 134 135 136 137 138 139 140
static int raster_block_offset(BLOCK_SIZE plane_bsize,
                               int raster_block, int stride) {
  const int bw = b_width_log2(plane_bsize);
  const int y = 4 * (raster_block >> bw);
  const int x = 4 * (raster_block & ((1 << bw) - 1));
  return y * stride + x;
}
static int16_t* raster_block_offset_int16(BLOCK_SIZE plane_bsize,
                                          int raster_block, int16_t *base) {
141
  const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
142 143 144
  return base + raster_block_offset(plane_bsize, raster_block, stride);
}

145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
                           int m, int n, int min_plane, int max_plane) {
  int i;

  for (i = min_plane; i < max_plane; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &x->e_mbd.plane[i];

    p->coeff    = ctx->coeff_pbuf[i][m];
    p->qcoeff   = ctx->qcoeff_pbuf[i][m];
    pd->dqcoeff = ctx->dqcoeff_pbuf[i][m];
    p->eobs     = ctx->eobs_pbuf[i][m];

    ctx->coeff_pbuf[i][m]   = ctx->coeff_pbuf[i][n];
    ctx->qcoeff_pbuf[i][m]  = ctx->qcoeff_pbuf[i][n];
    ctx->dqcoeff_pbuf[i][m] = ctx->dqcoeff_pbuf[i][n];
    ctx->eobs_pbuf[i][m]    = ctx->eobs_pbuf[i][n];

    ctx->coeff_pbuf[i][n]   = p->coeff;
    ctx->qcoeff_pbuf[i][n]  = p->qcoeff;
    ctx->dqcoeff_pbuf[i][n] = pd->dqcoeff;
    ctx->eobs_pbuf[i][n]    = p->eobs;
  }
}

170
static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
Deb Mukherjee's avatar
Deb Mukherjee committed
171 172 173 174 175
                            MACROBLOCK *x, MACROBLOCKD *xd,
                            int *out_rate_sum, int64_t *out_dist_sum) {
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
176 177 178
  int i;
  int64_t rate_sum = 0;
  int64_t dist_sum = 0;
hkuang's avatar
hkuang committed
179
  const int ref = xd->mi[0].src_mi->mbmi.ref_frame[0];
180
  unsigned int sse;
181
  unsigned int var = 0;
182
  unsigned int sum_sse = 0;
183
  const int shift = 8;
184 185 186 187
  int rate;
  int64_t dist;

  x->pred_sse[ref] = 0;
Deb Mukherjee's avatar
Deb Mukherjee committed
188 189 190 191

  for (i = 0; i < MAX_MB_PLANE; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &xd->plane[i];
192
    const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
193 194 195 196 197 198 199 200
    const TX_SIZE max_tx_size = max_txsize_lookup[bs];
    const BLOCK_SIZE unit_size = txsize_to_bsize[max_tx_size];
    int bw = 1 << (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
    int bh = 1 << (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
    int idx, idy;
    int lw = b_width_log2_lookup[unit_size] + 2;
    int lh = b_height_log2_lookup[unit_size] + 2;

201
    sum_sse = 0;
202 203 204 205 206

    for (idy = 0; idy < bh; ++idy) {
      for (idx = 0; idx < bw; ++idx) {
        uint8_t *src = p->src.buf + (idy * p->src.stride << lh) + (idx << lw);
        uint8_t *dst = pd->dst.buf + (idy * pd->dst.stride << lh) + (idx << lh);
207 208 209 210 211 212 213 214 215 216 217 218 219 220 221
        int block_idx = (idy << 1) + idx;

        var = cpi->fn_ptr[unit_size].vf(src, p->src.stride,
                                        dst, pd->dst.stride, &sse);
        x->bsse[(i << 2) + block_idx] = sse;
        sum_sse += sse;

        if (!x->select_tx_size) {
          if (x->bsse[(i << 2) + block_idx] < p->quant_thred[0] >> shift)
            x->skip_txfm[(i << 2) + block_idx] = 1;
          else if (var < p->quant_thred[1] >> shift)
            x->skip_txfm[(i << 2) + block_idx] = 2;
          else
            x->skip_txfm[(i << 2) + block_idx] = 0;
        }
222 223 224 225 226

        if (i == 0)
          x->pred_sse[ref] += sse;
      }
    }
227

228
    // Fast approximate the modelling function.
229
    if (cpi->oxcf.speed > 4) {
230
      int64_t rate;
231 232
      int64_t square_error = sse;
      int quantizer = (pd->dequant[1] >> 3);
233 234 235 236 237
#if CONFIG_VP9_HIGHBITDEPTH
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        quantizer >>= (xd->bd - 8);
      }
#endif  // CONFIG_VP9_HIGHBITDEPTH
238

239 240
      if (quantizer < 120)
        rate = (square_error * (280 - quantizer)) >> 8;
241 242 243 244 245
      else
        rate = 0;
      dist = (square_error * quantizer) >> 8;
      rate_sum += rate;
      dist_sum += dist;
246
    } else {
247 248 249 250 251 252 253 254 255 256
#if CONFIG_VP9_HIGHBITDEPTH
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        vp9_model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
                                     pd->dequant[1] >> (xd->bd - 5),
                                     &rate, &dist);
      } else {
        vp9_model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
                                     pd->dequant[1] >> 3, &rate, &dist);
      }
#else
257
      vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],
258
                                   pd->dequant[1] >> 3, &rate, &dist);
259
#endif  // CONFIG_VP9_HIGHBITDEPTH
260
      rate_sum += rate;
261
      dist_sum += dist;
262
    }
Deb Mukherjee's avatar
Deb Mukherjee committed
263 264
  }

265 266
  *out_rate_sum = (int)rate_sum;
  *out_dist_sum = dist_sum << 4;
Deb Mukherjee's avatar
Deb Mukherjee committed
267 268
}

269
int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
270
                          intptr_t block_size, int64_t *ssz) {
Ronald S. Bultje's avatar
Ronald S. Bultje committed
271
  int i;
272
  int64_t error = 0, sqcoeff = 0;
John Koleszar's avatar
John Koleszar committed
273

274
  for (i = 0; i < block_size; i++) {
275 276 277
    const int diff = coeff[i] - dqcoeff[i];
    error +=  diff * diff;
    sqcoeff += coeff[i] * coeff[i];
John Koleszar's avatar
John Koleszar committed
278
  }
John Koleszar's avatar
John Koleszar committed
279

280
  *ssz = sqcoeff;
John Koleszar's avatar
John Koleszar committed
281
  return error;
John Koleszar's avatar
John Koleszar committed
282 283
}

284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308

#if CONFIG_VP9_HIGHBITDEPTH
int64_t vp9_high_block_error_c(const tran_low_t *coeff,
                               const tran_low_t *dqcoeff,
                               intptr_t block_size,
                               int64_t *ssz, int bd) {
  int i;
  int64_t error = 0, sqcoeff = 0;
  int shift = 2 * (bd - 8);
  int rounding = shift > 0 ? 1 << (shift - 1) : 0;

  for (i = 0; i < block_size; i++) {
    const int64_t diff = coeff[i] - dqcoeff[i];
    error +=  diff * diff;
    sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
  }
  assert(error >= 0 && sqcoeff >= 0);
  error = (error + rounding) >> shift;
  sqcoeff = (sqcoeff + rounding) >> shift;

  *ssz = sqcoeff;
  return error;
}
#endif  // CONFIG_VP9_HIGHBITDEPTH

309 310 311 312 313
/* The trailing '0' is a terminator which is used inside cost_coeffs() to
 * decide whether to include cost of a trailing EOB node or not (i.e. we
 * can skip this if the last coefficient in this transform block, e.g. the
 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
 * were non-zero). */
314
static const int16_t band_counts[TX_SIZES][8] = {
315 316 317 318
  { 1, 2, 3, 4,  3,   16 - 13, 0 },
  { 1, 2, 3, 4, 11,   64 - 21, 0 },
  { 1, 2, 3, 4, 11,  256 - 21, 0 },
  { 1, 2, 3, 4, 11, 1024 - 21, 0 },
319
};
320
static INLINE int cost_coeffs(MACROBLOCK *x,
321
                              int plane, int block,
322
                              ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
John Koleszar's avatar
John Koleszar committed
323
                              TX_SIZE tx_size,
324 325
                              const int16_t *scan, const int16_t *nb,
                              int use_fast_coef_costing) {
326
  MACROBLOCKD *const xd = &x->e_mbd;
hkuang's avatar
hkuang committed
327
  MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
328 329
  const struct macroblock_plane *p = &x->plane[plane];
  const struct macroblockd_plane *pd = &xd->plane[plane];
330
  const PLANE_TYPE type = pd->plane_type;
331
  const int16_t *band_count = &band_counts[tx_size][1];
332
  const int eob = p->eobs[block];
333
  const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
334
  unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
Dmitry Kovalev's avatar
Dmitry Kovalev committed
335
                   x->token_costs[tx_size][type][is_inter_block(mbmi)];
336
  uint8_t token_cache[32 * 32];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
337
  int pt = combine_entropy_contexts(*A, *L);
338
  int c, cost;
339
  // Check for consistency of tx_size with mode info
340
  assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
341
                              : get_uv_tx_size(mbmi, pd) == tx_size);
342

343 344
  if (eob == 0) {
    // single eob token
345
    cost = token_costs[0][0][pt][EOB_TOKEN];
346
    c = 0;
347
  } else {
348
    int band_left = *band_count++;
349 350

    // dc token
Dmitry Kovalev's avatar
Dmitry Kovalev committed
351
    int v = qcoeff[0];
352
    int prev_t = vp9_dct_value_tokens_ptr[v].token;
353
    cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
354
    token_cache[0] = vp9_pt_energy_class[prev_t];
355
    ++token_costs;
356 357 358 359

    // ac tokens
    for (c = 1; c < eob; c++) {
      const int rc = scan[c];
360
      int t;
361

Dmitry Kovalev's avatar
Dmitry Kovalev committed
362
      v = qcoeff[rc];
363
      t = vp9_dct_value_tokens_ptr[v].token;
364 365 366
      if (use_fast_coef_costing) {
        cost += (*token_costs)[!prev_t][!prev_t][t] + vp9_dct_value_cost_ptr[v];
      } else {
367
        pt = get_coef_context(nb, token_cache, c);
368
        cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
369
        token_cache[rc] = vp9_pt_energy_class[t];
370
      }
371
      prev_t = t;
372
      if (!--band_left) {
373 374
        band_left = *band_count++;
        ++token_costs;
375
      }
376
    }
377 378

    // eob token
379
    if (band_left) {
380 381 382
      if (use_fast_coef_costing) {
        cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
      } else {
383
        pt = get_coef_context(nb, token_cache, c);
384 385
        cost += (*token_costs)[0][pt][EOB_TOKEN];
      }
386
    }
387 388
  }

389
  // is eob first coefficient;
390
  *A = *L = (c > 0);
391

392 393
  return cost;
}
394 395 396 397 398

#if CONFIG_VP9_HIGHBITDEPTH
static void dist_block(int plane, int block, TX_SIZE tx_size,
                       struct rdcost_block_args* args, int bd) {
#else
Alex Converse's avatar
Alex Converse committed
399 400
static void dist_block(int plane, int block, TX_SIZE tx_size,
                       struct rdcost_block_args* args) {
401
#endif  // CONFIG_VP9_HIGHBITDEPTH
402
  const int ss_txfrm_size = tx_size << 1;
Deb Mukherjee's avatar
Deb Mukherjee committed
403 404
  MACROBLOCK* const x = args->x;
  MACROBLOCKD* const xd = &x->e_mbd;
405 406
  const struct macroblock_plane *const p = &x->plane[plane];
  const struct macroblockd_plane *const pd = &xd->plane[plane];
Deb Mukherjee's avatar
Deb Mukherjee committed
407
  int64_t this_sse;
Alex Converse's avatar
Alex Converse committed
408
  int shift = tx_size == TX_32X32 ? 0 : 2;
409 410
  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
411 412 413 414
#if CONFIG_VP9_HIGHBITDEPTH
  args->dist = vp9_high_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
                                    &this_sse, bd) >> shift;
#else
415 416
  args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
                               &this_sse) >> shift;
417
#endif  // CONFIG_VP9_HIGHBITDEPTH
418
  args->sse  = this_sse >> shift;
419

hkuang's avatar
hkuang committed
420
  if (x->skip_encode && !is_inter_block(&xd->mi[0].src_mi->mbmi)) {
421 422
    // TODO(jingning): tune the model to better capture the distortion.
    int64_t p = (pd->dequant[1] * pd->dequant[1] *
423
                    (1 << ss_txfrm_size)) >> (shift + 2);
424 425 426 427 428
#if CONFIG_VP9_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      p >>= ((xd->bd - 8) * 2);
    }
#endif  // CONFIG_VP9_HIGHBITDEPTH
429 430
    args->dist += (p >> 4);
    args->sse  += p;
431
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
432 433
}

434
static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
Alex Converse's avatar
Alex Converse committed
435
                       TX_SIZE tx_size, struct rdcost_block_args* args) {
Deb Mukherjee's avatar
Deb Mukherjee committed
436
  int x_idx, y_idx;
Alex Converse's avatar
Alex Converse committed
437
  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x_idx, &y_idx);
Deb Mukherjee's avatar
Deb Mukherjee committed
438

439
  args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx,
Alex Converse's avatar
Alex Converse committed
440
                           args->t_left + y_idx, tx_size,
441 442
                           args->so->scan, args->so->neighbors,
                           args->use_fast_coef_costing);
Deb Mukherjee's avatar
Deb Mukherjee committed
443 444
}

445 446
static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
                          TX_SIZE tx_size, void *arg) {
Deb Mukherjee's avatar
Deb Mukherjee committed
447 448 449
  struct rdcost_block_args *args = arg;
  MACROBLOCK *const x = args->x;
  MACROBLOCKD *const xd = &x->e_mbd;
hkuang's avatar
hkuang committed
450
  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
451
  int64_t rd1, rd2, rd;
Deb Mukherjee's avatar
Deb Mukherjee committed
452

453 454 455
  if (args->skip)
    return;

456
  if (!is_inter_block(mbmi)) {
457
    vp9_encode_block_intra(x, plane, block, plane_bsize, tx_size, &mbmi->skip);
458 459 460 461 462 463 464
#if CONFIG_VP9_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      dist_block(plane, block, tx_size, args, xd->bd);
    } else {
      dist_block(plane, block, tx_size, args, 8);
    }
#else
465
    dist_block(plane, block, tx_size, args);
466
#endif  // CONFIG_VP9_HIGHBITDEPTH
467 468
  } else if (max_txsize_lookup[plane_bsize] == tx_size) {
    if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 0) {
469 470
      // full forward transform and quantization
      vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
471 472 473 474 475 476 477
#if CONFIG_VP9_HIGHBITDEPTH
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        dist_block(plane, block, tx_size, args, xd->bd);
      } else {
        dist_block(plane, block, tx_size, args, 8);
      }
#else
478
      dist_block(plane, block, tx_size, args);
479
#endif  // CONFIG_VP9_HIGHBITDEPTH
480
    } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 2) {
481
      // compute DC coefficient
482 483
      tran_low_t *const coeff   = BLOCK_OFFSET(x->plane[plane].coeff, block);
      tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
484
      vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size);
485
      args->sse  = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
486 487 488 489 490 491 492
      args->dist = args->sse;
      if (!x->plane[plane].eobs[block])
        args->dist = args->sse - ((coeff[0] * coeff[0] -
            (coeff[0] - dqcoeff[0]) * (coeff[0] - dqcoeff[0])) >> 2);
    } else {
      // skip forward transform
      x->plane[plane].eobs[block] = 0;
493
      args->sse  = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
494 495
      args->dist = args->sse;
    }
496 497 498
  } else {
    // full forward transform and quantization
    vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
499 500 501 502 503 504 505
#if CONFIG_VP9_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      dist_block(plane, block, tx_size, args, xd->bd);
    } else {
      dist_block(plane, block, tx_size, args, 8);
    }
#else
506
    dist_block(plane, block, tx_size, args);
507
#endif  // CONFIG_VP9_HIGHBITDEPTH
508
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
509

510
  rate_block(plane, block, plane_bsize, tx_size, args);
511 512
  rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
  rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
513 514

  // TODO(jingning): temporarily enabled only for luma component
515
  rd = MIN(rd1, rd2);
Yaowu Xu's avatar
Yaowu Xu committed
516
  if (plane == 0)
517
    x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] ||
Yaowu Xu's avatar
Yaowu Xu committed
518
                                    (rd1 > rd2 && !xd->lossless);
519

520 521 522
  args->this_rate += args->rate;
  args->this_dist += args->dist;
  args->this_sse  += args->sse;
523 524 525 526 527 528
  args->this_rd += rd;

  if (args->this_rd > args->best_rd) {
    args->skip = 1;
    return;
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
529 530
}

531
static void txfm_rd_in_plane(MACROBLOCK *x,
532 533 534
                             int *rate, int64_t *distortion,
                             int *skippable, int64_t *sse,
                             int64_t ref_best_rd, int plane,
535 536
                             BLOCK_SIZE bsize, TX_SIZE tx_size,
                             int use_fast_coef_casting) {
Deb Mukherjee's avatar
Deb Mukherjee committed
537
  MACROBLOCKD *const xd = &x->e_mbd;
538
  const struct macroblockd_plane *const pd = &xd->plane[plane];
539 540
  struct rdcost_block_args args;
  vp9_zero(args);
541 542
  args.x = x;
  args.best_rd = ref_best_rd;
543
  args.use_fast_coef_costing = use_fast_coef_casting;
544

545
  if (plane == 0)
hkuang's avatar
hkuang committed
546
    xd->mi[0].src_mi->mbmi.tx_size = tx_size;
547

548
  vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
549

550
  args.so = get_scan(xd, tx_size, pd->plane_type, 0);
Deb Mukherjee's avatar
Deb Mukherjee committed
551

552
  vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
553 554
                                         block_rd_txfm, &args);
  if (args.skip) {
555 556 557 558 559
    *rate       = INT_MAX;
    *distortion = INT64_MAX;
    *sse        = INT64_MAX;
    *skippable  = 0;
  } else {
560 561 562
    *distortion = args.this_dist;
    *rate       = args.this_rate;
    *sse        = args.this_sse;
563
    *skippable  = vp9_is_skippable_in_plane(x, bsize, plane);
564
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
565 566
}

567 568 569 570 571
static void choose_largest_tx_size(VP9_COMP *cpi, MACROBLOCK *x,
                                   int *rate, int64_t *distortion,
                                   int *skip, int64_t *sse,
                                   int64_t ref_best_rd,
                                   BLOCK_SIZE bs) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
572
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
Deb Mukherjee's avatar
Deb Mukherjee committed
573
  VP9_COMMON *const cm = &cpi->common;
Yaowu Xu's avatar
Yaowu Xu committed
574
  const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
Deb Mukherjee's avatar
Deb Mukherjee committed
575
  MACROBLOCKD *const xd = &x->e_mbd;
hkuang's avatar
hkuang committed
576
  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
Yaowu Xu's avatar
Yaowu Xu committed
577 578 579

  mbmi->tx_size = MIN(max_tx_size, largest_tx_size);

Alex Converse's avatar
Alex Converse committed
580
  txfm_rd_in_plane(x, rate, distortion, skip,
581
                   sse, ref_best_rd, 0, bs,
582
                   mbmi->tx_size, cpi->sf.use_fast_coef_costing);
Deb Mukherjee's avatar
Deb Mukherjee committed
583 584
}

585
static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
586 587 588 589
                                   int *rate,
                                   int64_t *distortion,
                                   int *skip,
                                   int64_t *psse,
590
                                   int64_t tx_cache[TX_MODES],
591
                                   int64_t ref_best_rd,
592
                                   BLOCK_SIZE bs) {
593
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
594 595
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
hkuang's avatar
hkuang committed
596
  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
597
  vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
598 599
  int r[TX_SIZES][2], s[TX_SIZES];
  int64_t d[TX_SIZES], sse[TX_SIZES];
600 601 602 603
  int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
                             {INT64_MAX, INT64_MAX},
                             {INT64_MAX, INT64_MAX},
                             {INT64_MAX, INT64_MAX}};
604
  int n, m;
605
  int s0, s1;
606 607
  const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
  int64_t best_rd = INT64_MAX;
608
  TX_SIZE best_tx = max_tx_size;
609

610
  const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
611 612 613
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
614

615
  for (n = max_tx_size; n >= 0;  n--) {
616 617 618
    txfm_rd_in_plane(x, &r[n][0], &d[n], &s[n],
                     &sse[n], ref_best_rd, 0, bs, n,
                     cpi->sf.use_fast_coef_costing);
619 620
    r[n][1] = r[n][0];
    if (r[n][0] < INT_MAX) {
621
      for (m = 0; m <= n - (n == (int) max_tx_size); m++) {
622 623 624 625 626 627
        if (m == n)
          r[n][1] += vp9_cost_zero(tx_probs[m]);
        else
          r[n][1] += vp9_cost_one(tx_probs[m]);
      }
    }
628 629
    if (d[n] == INT64_MAX) {
      rd[n][0] = rd[n][1] = INT64_MAX;
630
    } else if (s[n]) {
631 632 633 634
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
635
    }
636

637 638 639
    // Early termination in transform size search.
    if (cpi->sf.tx_size_search_breakout &&
        (rd[n][1] == INT64_MAX ||
640
        (n < (int) max_tx_size && rd[n][1] > rd[n + 1][1]) ||
641 642 643
        s[n] == 1))
      break;

644 645 646 647
    if (rd[n][1] < best_rd) {
      best_tx = n;
      best_rd = rd[n][1];
    }
648
  }
649 650
  mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
                      best_tx : MIN(max_tx_size, max_mode_tx_size);
651 652


653 654 655
  *distortion = d[mbmi->tx_size];
  *rate       = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT];
  *skip       = s[mbmi->tx_size];
656
  *psse       = sse[mbmi->tx_size];
657

658 659 660 661
  tx_cache[ONLY_4X4] = rd[TX_4X4][0];
  tx_cache[ALLOW_8X8] = rd[TX_8X8][0];
  tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0];
  tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0];
662

663 664 665 666
  if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
    tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
  } else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) {
    tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
Deb Mukherjee's avatar
Deb Mukherjee committed
667
  } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
668
    tx_cache[TX_MODE_SELECT] = rd[TX_8X8][1];
Deb Mukherjee's avatar
Deb Mukherjee committed
669
  } else {
670
    tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1];
671
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
672
}
673

674 675 676 677 678
static void super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
                            int64_t *distortion, int *skip,
                            int64_t *psse, BLOCK_SIZE bs,
                            int64_t txfm_cache[TX_MODES],
                            int64_t ref_best_rd) {
Jim Bankoski's avatar
Jim Bankoski committed
679
  MACROBLOCKD *xd = &x->e_mbd;
680 681
  int64_t sse;
  int64_t *ret_sse = psse ? psse : &sse;
Yaowu Xu's avatar
Yaowu Xu committed
682

hkuang's avatar
hkuang committed
683
  assert(bs == xd->mi[0].src_mi->mbmi.sb_type);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
684

685
  if (cpi->sf.tx_size_search_method == USE_LARGESTALL || xd->lossless) {
686
    vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
687
    choose_largest_tx_size(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd,
688
                           bs);
689
  } else {
690
    choose_tx_size_from_rd(cpi, x, rate, distortion, skip, ret_sse,
691
                           txfm_cache, ref_best_rd, bs);
692 693 694
  }
}

695 696
static int conditional_skipintra(PREDICTION_MODE mode,
                                 PREDICTION_MODE best_intra_mode) {
697 698 699 700 701 702 703 704
  if (mode == D117_PRED &&
      best_intra_mode != V_PRED &&
      best_intra_mode != D135_PRED)
    return 1;
  if (mode == D63_PRED &&
      best_intra_mode != V_PRED &&
      best_intra_mode != D45_PRED)
    return 1;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
705
  if (mode == D207_PRED &&
706 707 708 709 710 711 712 713 714 715
      best_intra_mode != H_PRED &&
      best_intra_mode != D45_PRED)
    return 1;
  if (mode == D153_PRED &&
      best_intra_mode != H_PRED &&
      best_intra_mode != D135_PRED)
    return 1;
  return 0;
}

716
static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
717
                                     PREDICTION_MODE *best_mode,
718
                                     const int *bmode_costs,
719 720
                                     ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                                     int *bestrate, int *bestratey,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
721
                                     int64_t *bestdistortion,
722
                                     BLOCK_SIZE bsize, int64_t rd_thresh) {
723
  PREDICTION_MODE mode;
724
  MACROBLOCKD *const xd = &x->e_mbd;
725
  int64_t best_rd = rd_thresh;
726

727 728 729
  struct macroblock_plane *p = &x->plane[0];
  struct macroblockd_plane *pd = &xd->plane[0];
  const int src_stride = p->src.stride;
730
  const int dst_stride = pd->dst.stride;
731 732 733 734
  const uint8_t *src_init = &p->src.buf[raster_block_offset(BLOCK_8X8, ib,
                                                            src_stride)];
  uint8_t *dst_init = &pd->dst.buf[raster_block_offset(BLOCK_8X8, ib,
                                                       dst_stride)];
735 736
  ENTROPY_CONTEXT ta[2], tempa[2];
  ENTROPY_CONTEXT tl[2], templ[2];
737

Jim Bankoski's avatar
Jim Bankoski committed
738 739
  const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
  const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
740
  int idx, idy;
741
  uint8_t best_dst[8 * 8];
742 743 744
#if CONFIG_VP9_HIGHBITDEPTH
  uint16_t best_dst16[8 * 8];
#endif
John Koleszar's avatar
John Koleszar committed
745

Jingning Han's avatar
Jingning Han committed
746
  assert(ib < 4);
747

748 749
  vpx_memcpy(ta, a, sizeof(ta));
  vpx_memcpy(tl, l, sizeof(tl));
hkuang's avatar
hkuang committed
750
  xd->mi[0].src_mi->mbmi.tx_size = TX_4X4;
751

752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853
#if CONFIG_VP9_HIGHBITDEPTH
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
      int64_t this_rd;
      int ratey = 0;
      int64_t distortion = 0;
      int rate = bmode_costs[mode];

      if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
        continue;

      // Only do the oblique modes if the best so far is
      // one of the neighboring directional modes
      if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
        if (conditional_skipintra(mode, *best_mode))
            continue;
      }

      vpx_memcpy(tempa, ta, sizeof(ta));
      vpx_memcpy(templ, tl, sizeof(tl));

      for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
        for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
          const int block = ib + idy * 2 + idx;
          const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
          uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
          int16_t *const src_diff = raster_block_offset_int16(BLOCK_8X8, block,
                                                              p->src_diff);
          tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
          xd->mi[0].src_mi->bmi[block].as_mode = mode;
          vp9_predict_intra_block(xd, block, 1,
                                  TX_4X4, mode,
                                  x->skip_encode ? src : dst,
                                  x->skip_encode ? src_stride : dst_stride,
                                  dst, dst_stride, idx, idy, 0);
          vp9_high_subtract_block(4, 4, src_diff, 8, src, src_stride,
                                  dst, dst_stride, xd->bd);
          if (xd->lossless) {
            const scan_order *so = &vp9_default_scan_orders[TX_4X4];
            vp9_high_fwht4x4(src_diff, coeff, 8);
            vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
            ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
                                 so->scan, so->neighbors,
                                 cpi->sf.use_fast_coef_costing);
            if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
              goto next_highbd;
            vp9_high_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block),
                                 dst, dst_stride,
                                 p->eobs[block], xd->bd);
          } else {
            int64_t unused;
            const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block);
            const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type];
            vp9_high_fht4x4(src_diff, coeff, 8, tx_type);
            vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
            ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
                                 so->scan, so->neighbors,
                                 cpi->sf.use_fast_coef_costing);
            distortion += vp9_high_block_error(coeff,
                                               BLOCK_OFFSET(pd->dqcoeff, block),
                                               16, &unused, xd->bd) >> 2;
            if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
              goto next_highbd;
            vp9_high_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block),
                                dst, dst_stride, p->eobs[block], xd->bd);
          }
        }
      }

      rate += ratey;
      this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);

      if (this_rd < best_rd) {
        *bestrate = rate;
        *bestratey = ratey;
        *bestdistortion = distortion;
        best_rd = this_rd;
        *best_mode = mode;
        vpx_memcpy(a, tempa, sizeof(tempa));
        vpx_memcpy(l, templ, sizeof(templ));
        for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
          vpx_memcpy(best_dst16 + idy * 8,
                     CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
                     num_4x4_blocks_wide * 4 * sizeof(uint16_t));
        }
      }
    next_highbd:
      {}
    }
    if (best_rd >= rd_thresh || x->skip_encode)
      return best_rd;

    for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
      vpx_memcpy(CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
                 best_dst16 + idy * 8,
                 num_4x4_blocks_wide * 4 * sizeof(uint16_t));
    }

    return best_rd;
  }
#endif  // CONFIG_VP9_HIGHBITDEPTH

854
  for (mode = DC_PRED; mode <= TM_PRED; ++mode) {