vp9_rdopt.c 152 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
11
 */

#include <assert.h>
12
#include <math.h>
13

14
15
16
17
18
19
#include "./vp9_rtcd.h"

#include "vpx_mem/vpx_mem.h"

#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_entropy.h"
20
#include "vp9/common/vp9_entropymode.h"
21
22
23
24
#include "vp9/common/vp9_idct.h"
#include "vp9/common/vp9_mvref_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_quant_common.h"
25
26
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
27
28
29
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_systemdependent.h"

Dmitry Kovalev's avatar
Dmitry Kovalev committed
30
#include "vp9/encoder/vp9_cost.h"
31
#include "vp9/encoder/vp9_encodemb.h"
32
#include "vp9/encoder/vp9_encodemv.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
33
#include "vp9/encoder/vp9_encoder.h"
34
#include "vp9/encoder/vp9_mcomp.h"
35
#include "vp9/encoder/vp9_quantize.h"
36
#include "vp9/encoder/vp9_ratectrl.h"
37
#include "vp9/encoder/vp9_rd.h"
38
39
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_variance.h"
Paul Wilkins's avatar
Paul Wilkins committed
40

41
42
43
44
45
46
47
48
#define LAST_FRAME_MODE_MASK    ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | \
                                 (1 << INTRA_FRAME))
#define GOLDEN_FRAME_MODE_MASK  ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | \
                                 (1 << INTRA_FRAME))
#define ALT_REF_MODE_MASK       ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | \
                                 (1 << INTRA_FRAME))

#define SECOND_REF_FRAME_MASK   ((1 << ALTREF_FRAME) | 0x01)
49

Paul Wilkins's avatar
Paul Wilkins committed
50
51
#define MIN_EARLY_TERM_INDEX    3

52
typedef struct {
53
  PREDICTION_MODE mode;
54
55
56
57
58
59
60
  MV_REFERENCE_FRAME ref_frame[2];
} MODE_DEFINITION;

typedef struct {
  MV_REFERENCE_FRAME ref_frame[2];
} REF_DEFINITION;

Alex Converse's avatar
Alex Converse committed
61
62
63
64
65
66
67
68
69
70
71
72
73
struct rdcost_block_args {
  MACROBLOCK *x;
  ENTROPY_CONTEXT t_above[16];
  ENTROPY_CONTEXT t_left[16];
  int rate;
  int64_t dist;
  int64_t sse;
  int this_rate;
  int64_t this_dist;
  int64_t this_sse;
  int64_t this_rd;
  int64_t best_rd;
  int skip;
74
  int use_fast_coef_costing;
75
  const scan_order *so;
Alex Converse's avatar
Alex Converse committed
76
77
};

78
static const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
79
80
81
82
83
84
85
86
87
88
89
90
  {NEARESTMV, {LAST_FRAME,   NONE}},
  {NEARESTMV, {ALTREF_FRAME, NONE}},
  {NEARESTMV, {GOLDEN_FRAME, NONE}},

  {DC_PRED,   {INTRA_FRAME,  NONE}},

  {NEWMV,     {LAST_FRAME,   NONE}},
  {NEWMV,     {ALTREF_FRAME, NONE}},
  {NEWMV,     {GOLDEN_FRAME, NONE}},

  {NEARMV,    {LAST_FRAME,   NONE}},
  {NEARMV,    {ALTREF_FRAME, NONE}},
Jingning Han's avatar
Jingning Han committed
91
92
93
94
95
96
  {NEARMV,    {GOLDEN_FRAME, NONE}},

  {ZEROMV,    {LAST_FRAME,   NONE}},
  {ZEROMV,    {GOLDEN_FRAME, NONE}},
  {ZEROMV,    {ALTREF_FRAME, NONE}},

97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
  {NEARESTMV, {LAST_FRAME,   ALTREF_FRAME}},
  {NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}},

  {TM_PRED,   {INTRA_FRAME,  NONE}},

  {NEARMV,    {LAST_FRAME,   ALTREF_FRAME}},
  {NEWMV,     {LAST_FRAME,   ALTREF_FRAME}},
  {NEARMV,    {GOLDEN_FRAME, ALTREF_FRAME}},
  {NEWMV,     {GOLDEN_FRAME, ALTREF_FRAME}},

  {ZEROMV,    {LAST_FRAME,   ALTREF_FRAME}},
  {ZEROMV,    {GOLDEN_FRAME, ALTREF_FRAME}},

  {H_PRED,    {INTRA_FRAME,  NONE}},
  {V_PRED,    {INTRA_FRAME,  NONE}},
  {D135_PRED, {INTRA_FRAME,  NONE}},
  {D207_PRED, {INTRA_FRAME,  NONE}},
  {D153_PRED, {INTRA_FRAME,  NONE}},
  {D63_PRED,  {INTRA_FRAME,  NONE}},
  {D117_PRED, {INTRA_FRAME,  NONE}},
  {D45_PRED,  {INTRA_FRAME,  NONE}},
118
119
};

120
static const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
121
122
123
124
125
126
  {{LAST_FRAME,   NONE}},
  {{GOLDEN_FRAME, NONE}},
  {{ALTREF_FRAME, NONE}},
  {{LAST_FRAME,   ALTREF_FRAME}},
  {{GOLDEN_FRAME, ALTREF_FRAME}},
  {{INTRA_FRAME,  NONE}},
John Koleszar's avatar
John Koleszar committed
127
128
};

129
130
static int raster_block_offset(BLOCK_SIZE plane_bsize,
                               int raster_block, int stride) {
131
  const int bw = b_width_log2_lookup[plane_bsize];
132
133
134
135
136
137
  const int y = 4 * (raster_block >> bw);
  const int x = 4 * (raster_block & ((1 << bw) - 1));
  return y * stride + x;
}
static int16_t* raster_block_offset_int16(BLOCK_SIZE plane_bsize,
                                          int raster_block, int16_t *base) {
138
  const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
139
140
141
  return base + raster_block_offset(plane_bsize, raster_block, stride);
}

142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
                           int m, int n, int min_plane, int max_plane) {
  int i;

  for (i = min_plane; i < max_plane; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &x->e_mbd.plane[i];

    p->coeff    = ctx->coeff_pbuf[i][m];
    p->qcoeff   = ctx->qcoeff_pbuf[i][m];
    pd->dqcoeff = ctx->dqcoeff_pbuf[i][m];
    p->eobs     = ctx->eobs_pbuf[i][m];

    ctx->coeff_pbuf[i][m]   = ctx->coeff_pbuf[i][n];
    ctx->qcoeff_pbuf[i][m]  = ctx->qcoeff_pbuf[i][n];
    ctx->dqcoeff_pbuf[i][m] = ctx->dqcoeff_pbuf[i][n];
    ctx->eobs_pbuf[i][m]    = ctx->eobs_pbuf[i][n];

    ctx->coeff_pbuf[i][n]   = p->coeff;
    ctx->qcoeff_pbuf[i][n]  = p->qcoeff;
    ctx->dqcoeff_pbuf[i][n] = pd->dqcoeff;
    ctx->eobs_pbuf[i][n]    = p->eobs;
  }
}

167
static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
Deb Mukherjee's avatar
Deb Mukherjee committed
168
                            MACROBLOCK *x, MACROBLOCKD *xd,
169
170
                            int *out_rate_sum, int64_t *out_dist_sum,
                            int *skip_txfm_sb, int64_t *skip_sse_sb) {
Deb Mukherjee's avatar
Deb Mukherjee committed
171
172
173
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
174
175
176
  int i;
  int64_t rate_sum = 0;
  int64_t dist_sum = 0;
hkuang's avatar
hkuang committed
177
  const int ref = xd->mi[0].src_mi->mbmi.ref_frame[0];
178
  unsigned int sse;
179
  unsigned int var = 0;
180
  unsigned int sum_sse = 0;
181
182
  int64_t total_sse = 0;
  int skip_flag = 1;
183
  const int shift = 6;
184
185
186
187
  int rate;
  int64_t dist;

  x->pred_sse[ref] = 0;
Deb Mukherjee's avatar
Deb Mukherjee committed
188
189
190
191

  for (i = 0; i < MAX_MB_PLANE; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &xd->plane[i];
192
    const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
193
194
    const TX_SIZE max_tx_size = max_txsize_lookup[bs];
    const BLOCK_SIZE unit_size = txsize_to_bsize[max_tx_size];
195
196
197
198
199
200
    const int64_t dc_thr = p->quant_thred[0] >> shift;
    const int64_t ac_thr = p->quant_thred[1] >> shift;
    // The low thresholds are used to measure if the prediction errors are
    // low enough so that we can skip the mode search.
    const int64_t low_dc_thr = MIN(50, dc_thr >> 2);
    const int64_t low_ac_thr = MIN(80, ac_thr >> 2);
201
202
203
204
205
206
    int bw = 1 << (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
    int bh = 1 << (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
    int idx, idy;
    int lw = b_width_log2_lookup[unit_size] + 2;
    int lh = b_height_log2_lookup[unit_size] + 2;

207
    sum_sse = 0;
208
209
210
211
212

    for (idy = 0; idy < bh; ++idy) {
      for (idx = 0; idx < bw; ++idx) {
        uint8_t *src = p->src.buf + (idy * p->src.stride << lh) + (idx << lw);
        uint8_t *dst = pd->dst.buf + (idy * pd->dst.stride << lh) + (idx << lh);
213
        int block_idx = (idy << 1) + idx;
214
        int low_err_skip = 0;
215
216
217
218
219
220

        var = cpi->fn_ptr[unit_size].vf(src, p->src.stride,
                                        dst, pd->dst.stride, &sse);
        x->bsse[(i << 2) + block_idx] = sse;
        sum_sse += sse;

221
        x->skip_txfm[(i << 2) + block_idx] = 0;
222
        if (!x->select_tx_size) {
223
          // Check if all ac coefficients can be quantized to zero.
224
          if (var < ac_thr || var == 0) {
225
            x->skip_txfm[(i << 2) + block_idx] = 2;
226
227

            // Check if dc coefficient can be quantized to zero.
228
            if (sse - var < dc_thr || sse == var) {
229
              x->skip_txfm[(i << 2) + block_idx] = 1;
230
231
232
233

              if (!sse || (var < low_ac_thr && sse - var < low_dc_thr))
                low_err_skip = 1;
            }
234
          }
235
        }
236

237
238
239
        if (skip_flag && !low_err_skip)
          skip_flag = 0;

240
241
242
243
        if (i == 0)
          x->pred_sse[ref] += sse;
      }
    }
244

245
246
    total_sse += sum_sse;

247
    // Fast approximate the modelling function.
248
    if (cpi->oxcf.speed > 4) {
249
      int64_t rate;
250
      const int64_t square_error = sum_sse;
251
      int quantizer = (pd->dequant[1] >> 3);
252
253
254
255
256
#if CONFIG_VP9_HIGHBITDEPTH
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        quantizer >>= (xd->bd - 8);
      }
#endif  // CONFIG_VP9_HIGHBITDEPTH
257

258
259
      if (quantizer < 120)
        rate = (square_error * (280 - quantizer)) >> 8;
260
261
262
263
264
      else
        rate = 0;
      dist = (square_error * quantizer) >> 8;
      rate_sum += rate;
      dist_sum += dist;
265
    } else {
266
267
#if CONFIG_VP9_HIGHBITDEPTH
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
Deb Mukherjee's avatar
Deb Mukherjee committed
268
        vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],
269
270
271
                                     pd->dequant[1] >> (xd->bd - 5),
                                     &rate, &dist);
      } else {
Deb Mukherjee's avatar
Deb Mukherjee committed
272
        vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],
273
274
275
                                     pd->dequant[1] >> 3, &rate, &dist);
      }
#else
276
      vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],
277
                                   pd->dequant[1] >> 3, &rate, &dist);
278
#endif  // CONFIG_VP9_HIGHBITDEPTH
279
      rate_sum += rate;
280
      dist_sum += dist;
281
    }
Deb Mukherjee's avatar
Deb Mukherjee committed
282
283
  }

284
285
  *skip_txfm_sb = skip_flag;
  *skip_sse_sb = total_sse << 4;
286
287
  *out_rate_sum = (int)rate_sum;
  *out_dist_sum = dist_sum << 4;
Deb Mukherjee's avatar
Deb Mukherjee committed
288
289
}

290
int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
291
                          intptr_t block_size, int64_t *ssz) {
Ronald S. Bultje's avatar
Ronald S. Bultje committed
292
  int i;
293
  int64_t error = 0, sqcoeff = 0;
John Koleszar's avatar
John Koleszar committed
294

295
  for (i = 0; i < block_size; i++) {
296
297
298
    const int diff = coeff[i] - dqcoeff[i];
    error +=  diff * diff;
    sqcoeff += coeff[i] * coeff[i];
John Koleszar's avatar
John Koleszar committed
299
  }
John Koleszar's avatar
John Koleszar committed
300

301
  *ssz = sqcoeff;
John Koleszar's avatar
John Koleszar committed
302
  return error;
John Koleszar's avatar
John Koleszar committed
303
304
}

305
306

#if CONFIG_VP9_HIGHBITDEPTH
307
308
309
310
int64_t vp9_highbd_block_error_c(const tran_low_t *coeff,
                                 const tran_low_t *dqcoeff,
                                 intptr_t block_size,
                                 int64_t *ssz, int bd) {
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
  int i;
  int64_t error = 0, sqcoeff = 0;
  int shift = 2 * (bd - 8);
  int rounding = shift > 0 ? 1 << (shift - 1) : 0;

  for (i = 0; i < block_size; i++) {
    const int64_t diff = coeff[i] - dqcoeff[i];
    error +=  diff * diff;
    sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
  }
  assert(error >= 0 && sqcoeff >= 0);
  error = (error + rounding) >> shift;
  sqcoeff = (sqcoeff + rounding) >> shift;

  *ssz = sqcoeff;
  return error;
}
#endif  // CONFIG_VP9_HIGHBITDEPTH

330
331
332
333
334
/* The trailing '0' is a terminator which is used inside cost_coeffs() to
 * decide whether to include cost of a trailing EOB node or not (i.e. we
 * can skip this if the last coefficient in this transform block, e.g. the
 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
 * were non-zero). */
335
static const int16_t band_counts[TX_SIZES][8] = {
336
337
338
339
  { 1, 2, 3, 4,  3,   16 - 13, 0 },
  { 1, 2, 3, 4, 11,   64 - 21, 0 },
  { 1, 2, 3, 4, 11,  256 - 21, 0 },
  { 1, 2, 3, 4, 11, 1024 - 21, 0 },
340
};
341
static INLINE int cost_coeffs(MACROBLOCK *x,
342
                              int plane, int block,
343
                              ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
John Koleszar's avatar
John Koleszar committed
344
                              TX_SIZE tx_size,
345
346
                              const int16_t *scan, const int16_t *nb,
                              int use_fast_coef_costing) {
347
  MACROBLOCKD *const xd = &x->e_mbd;
hkuang's avatar
hkuang committed
348
  MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
349
350
  const struct macroblock_plane *p = &x->plane[plane];
  const struct macroblockd_plane *pd = &xd->plane[plane];
351
  const PLANE_TYPE type = pd->plane_type;
352
  const int16_t *band_count = &band_counts[tx_size][1];
353
  const int eob = p->eobs[block];
354
  const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
355
  unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
Dmitry Kovalev's avatar
Dmitry Kovalev committed
356
                   x->token_costs[tx_size][type][is_inter_block(mbmi)];
357
  uint8_t token_cache[32 * 32];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
358
  int pt = combine_entropy_contexts(*A, *L);
359
  int c, cost;
360
  // Check for consistency of tx_size with mode info
361
  assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
362
                              : get_uv_tx_size(mbmi, pd) == tx_size);
363

364
365
  if (eob == 0) {
    // single eob token
366
    cost = token_costs[0][0][pt][EOB_TOKEN];
367
    c = 0;
368
  } else {
369
    int band_left = *band_count++;
370
371

    // dc token
Dmitry Kovalev's avatar
Dmitry Kovalev committed
372
    int v = qcoeff[0];
373
    int prev_t = vp9_dct_value_tokens_ptr[v].token;
374
    cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
375
    token_cache[0] = vp9_pt_energy_class[prev_t];
376
    ++token_costs;
377
378
379
380

    // ac tokens
    for (c = 1; c < eob; c++) {
      const int rc = scan[c];
381
      int t;
382

Dmitry Kovalev's avatar
Dmitry Kovalev committed
383
      v = qcoeff[rc];
384
      t = vp9_dct_value_tokens_ptr[v].token;
385
386
387
      if (use_fast_coef_costing) {
        cost += (*token_costs)[!prev_t][!prev_t][t] + vp9_dct_value_cost_ptr[v];
      } else {
388
        pt = get_coef_context(nb, token_cache, c);
389
        cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
390
        token_cache[rc] = vp9_pt_energy_class[t];
391
      }
392
      prev_t = t;
393
      if (!--band_left) {
394
395
        band_left = *band_count++;
        ++token_costs;
396
      }
397
    }
398
399

    // eob token
400
    if (band_left) {
401
402
403
      if (use_fast_coef_costing) {
        cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
      } else {
404
        pt = get_coef_context(nb, token_cache, c);
405
406
        cost += (*token_costs)[0][pt][EOB_TOKEN];
      }
407
    }
408
409
  }

410
  // is eob first coefficient;
411
  *A = *L = (c > 0);
412

413
414
  return cost;
}
415
416
417
418
419

#if CONFIG_VP9_HIGHBITDEPTH
static void dist_block(int plane, int block, TX_SIZE tx_size,
                       struct rdcost_block_args* args, int bd) {
#else
Alex Converse's avatar
Alex Converse committed
420
421
static void dist_block(int plane, int block, TX_SIZE tx_size,
                       struct rdcost_block_args* args) {
422
#endif  // CONFIG_VP9_HIGHBITDEPTH
423
  const int ss_txfrm_size = tx_size << 1;
Deb Mukherjee's avatar
Deb Mukherjee committed
424
425
  MACROBLOCK* const x = args->x;
  MACROBLOCKD* const xd = &x->e_mbd;
426
427
  const struct macroblock_plane *const p = &x->plane[plane];
  const struct macroblockd_plane *const pd = &xd->plane[plane];
Deb Mukherjee's avatar
Deb Mukherjee committed
428
  int64_t this_sse;
Alex Converse's avatar
Alex Converse committed
429
  int shift = tx_size == TX_32X32 ? 0 : 2;
430
431
  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
432
#if CONFIG_VP9_HIGHBITDEPTH
433
434
  args->dist = vp9_highbd_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
                                      &this_sse, bd) >> shift;
435
#else
436
437
  args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
                               &this_sse) >> shift;
438
#endif  // CONFIG_VP9_HIGHBITDEPTH
439
  args->sse  = this_sse >> shift;
440

hkuang's avatar
hkuang committed
441
  if (x->skip_encode && !is_inter_block(&xd->mi[0].src_mi->mbmi)) {
442
443
    // TODO(jingning): tune the model to better capture the distortion.
    int64_t p = (pd->dequant[1] * pd->dequant[1] *
444
                    (1 << ss_txfrm_size)) >> (shift + 2);
445
446
447
448
449
#if CONFIG_VP9_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      p >>= ((xd->bd - 8) * 2);
    }
#endif  // CONFIG_VP9_HIGHBITDEPTH
450
451
    args->dist += (p >> 4);
    args->sse  += p;
452
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
453
454
}

455
static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
Alex Converse's avatar
Alex Converse committed
456
                       TX_SIZE tx_size, struct rdcost_block_args* args) {
Deb Mukherjee's avatar
Deb Mukherjee committed
457
  int x_idx, y_idx;
Alex Converse's avatar
Alex Converse committed
458
  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x_idx, &y_idx);
Deb Mukherjee's avatar
Deb Mukherjee committed
459

460
  args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx,
Alex Converse's avatar
Alex Converse committed
461
                           args->t_left + y_idx, tx_size,
462
463
                           args->so->scan, args->so->neighbors,
                           args->use_fast_coef_costing);
Deb Mukherjee's avatar
Deb Mukherjee committed
464
465
}

466
467
static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
                          TX_SIZE tx_size, void *arg) {
Deb Mukherjee's avatar
Deb Mukherjee committed
468
469
470
  struct rdcost_block_args *args = arg;
  MACROBLOCK *const x = args->x;
  MACROBLOCKD *const xd = &x->e_mbd;
hkuang's avatar
hkuang committed
471
  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
472
  int64_t rd1, rd2, rd;
Deb Mukherjee's avatar
Deb Mukherjee committed
473

474
475
476
  if (args->skip)
    return;

477
  if (!is_inter_block(mbmi)) {
478
    vp9_encode_block_intra(x, plane, block, plane_bsize, tx_size, &mbmi->skip);
479
480
481
482
483
484
485
#if CONFIG_VP9_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      dist_block(plane, block, tx_size, args, xd->bd);
    } else {
      dist_block(plane, block, tx_size, args, 8);
    }
#else
486
    dist_block(plane, block, tx_size, args);
487
#endif  // CONFIG_VP9_HIGHBITDEPTH
488
489
  } else if (max_txsize_lookup[plane_bsize] == tx_size) {
    if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 0) {
490
491
      // full forward transform and quantization
      vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
492
493
494
495
496
497
498
#if CONFIG_VP9_HIGHBITDEPTH
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        dist_block(plane, block, tx_size, args, xd->bd);
      } else {
        dist_block(plane, block, tx_size, args, 8);
      }
#else
499
      dist_block(plane, block, tx_size, args);
500
#endif  // CONFIG_VP9_HIGHBITDEPTH
501
    } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 2) {
502
      // compute DC coefficient
503
504
      tran_low_t *const coeff   = BLOCK_OFFSET(x->plane[plane].coeff, block);
      tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
505
      vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size);
506
      args->sse  = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
507
      args->dist = args->sse;
508
509
510
      if (x->plane[plane].eobs[block]) {
        int64_t dc_correct = coeff[0] * coeff[0] -
            (coeff[0] - dqcoeff[0]) * (coeff[0] - dqcoeff[0]);
511
512
513
#if CONFIG_VP9_HIGHBITDEPTH
        dc_correct >>= ((xd->bd - 8) * 2);
#endif
514
515
516
        if (tx_size != TX_32X32)
          dc_correct >>= 2;

517
        args->dist = MAX(0, args->sse - dc_correct);
518
      }
519
520
521
    } else {
      // skip forward transform
      x->plane[plane].eobs[block] = 0;
522
      args->sse  = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
523
524
      args->dist = args->sse;
    }
525
526
527
  } else {
    // full forward transform and quantization
    vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
528
529
530
531
532
533
534
#if CONFIG_VP9_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      dist_block(plane, block, tx_size, args, xd->bd);
    } else {
      dist_block(plane, block, tx_size, args, 8);
    }
#else
535
    dist_block(plane, block, tx_size, args);
536
#endif  // CONFIG_VP9_HIGHBITDEPTH
537
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
538

539
  rate_block(plane, block, plane_bsize, tx_size, args);
540
541
  rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
  rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
542
543

  // TODO(jingning): temporarily enabled only for luma component
544
  rd = MIN(rd1, rd2);
Yaowu Xu's avatar
Yaowu Xu committed
545
  if (plane == 0)
546
    x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] ||
Yaowu Xu's avatar
Yaowu Xu committed
547
                                    (rd1 > rd2 && !xd->lossless);
548

549
550
551
  args->this_rate += args->rate;
  args->this_dist += args->dist;
  args->this_sse  += args->sse;
552
553
554
555
556
557
  args->this_rd += rd;

  if (args->this_rd > args->best_rd) {
    args->skip = 1;
    return;
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
558
559
}

560
static void txfm_rd_in_plane(MACROBLOCK *x,
561
562
563
                             int *rate, int64_t *distortion,
                             int *skippable, int64_t *sse,
                             int64_t ref_best_rd, int plane,
564
565
                             BLOCK_SIZE bsize, TX_SIZE tx_size,
                             int use_fast_coef_casting) {
Deb Mukherjee's avatar
Deb Mukherjee committed
566
  MACROBLOCKD *const xd = &x->e_mbd;
567
  const struct macroblockd_plane *const pd = &xd->plane[plane];
568
569
  struct rdcost_block_args args;
  vp9_zero(args);
570
571
  args.x = x;
  args.best_rd = ref_best_rd;
572
  args.use_fast_coef_costing = use_fast_coef_casting;
573

574
  if (plane == 0)
hkuang's avatar
hkuang committed
575
    xd->mi[0].src_mi->mbmi.tx_size = tx_size;
576

577
  vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
578

579
  args.so = get_scan(xd, tx_size, pd->plane_type, 0);
Deb Mukherjee's avatar
Deb Mukherjee committed
580

581
  vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
582
583
                                         block_rd_txfm, &args);
  if (args.skip) {
584
585
586
587
588
    *rate       = INT_MAX;
    *distortion = INT64_MAX;
    *sse        = INT64_MAX;
    *skippable  = 0;
  } else {
589
590
591
    *distortion = args.this_dist;
    *rate       = args.this_rate;
    *sse        = args.this_sse;
592
    *skippable  = vp9_is_skippable_in_plane(x, bsize, plane);
593
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
594
595
}

596
597
598
599
600
static void choose_largest_tx_size(VP9_COMP *cpi, MACROBLOCK *x,
                                   int *rate, int64_t *distortion,
                                   int *skip, int64_t *sse,
                                   int64_t ref_best_rd,
                                   BLOCK_SIZE bs) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
601
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
Deb Mukherjee's avatar
Deb Mukherjee committed
602
  VP9_COMMON *const cm = &cpi->common;
Yaowu Xu's avatar
Yaowu Xu committed
603
  const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
Deb Mukherjee's avatar
Deb Mukherjee committed
604
  MACROBLOCKD *const xd = &x->e_mbd;
hkuang's avatar
hkuang committed
605
  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
Yaowu Xu's avatar
Yaowu Xu committed
606
607
608

  mbmi->tx_size = MIN(max_tx_size, largest_tx_size);

Alex Converse's avatar
Alex Converse committed
609
  txfm_rd_in_plane(x, rate, distortion, skip,
610
                   sse, ref_best_rd, 0, bs,
611
                   mbmi->tx_size, cpi->sf.use_fast_coef_costing);
Deb Mukherjee's avatar
Deb Mukherjee committed
612
613
}

614
static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
615
616
617
618
                                   int *rate,
                                   int64_t *distortion,
                                   int *skip,
                                   int64_t *psse,
619
                                   int64_t tx_cache[TX_MODES],
620
                                   int64_t ref_best_rd,
621
                                   BLOCK_SIZE bs) {
622
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
623
624
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
hkuang's avatar
hkuang committed
625
  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
626
  vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
627
628
  int r[TX_SIZES][2], s[TX_SIZES];
  int64_t d[TX_SIZES], sse[TX_SIZES];
629
630
631
632
  int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
                             {INT64_MAX, INT64_MAX},
                             {INT64_MAX, INT64_MAX},
                             {INT64_MAX, INT64_MAX}};
633
  int n, m;
634
  int s0, s1;
635
636
  const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
  int64_t best_rd = INT64_MAX;
637
  TX_SIZE best_tx = max_tx_size;
638

639
  const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs);
640
641
642
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
643

644
  for (n = max_tx_size; n >= 0;  n--) {
645
646
647
    txfm_rd_in_plane(x, &r[n][0], &d[n], &s[n],
                     &sse[n], ref_best_rd, 0, bs, n,
                     cpi->sf.use_fast_coef_costing);
648
649
    r[n][1] = r[n][0];
    if (r[n][0] < INT_MAX) {
650
      for (m = 0; m <= n - (n == (int) max_tx_size); m++) {
651
652
653
654
655
656
        if (m == n)
          r[n][1] += vp9_cost_zero(tx_probs[m]);
        else
          r[n][1] += vp9_cost_one(tx_probs[m]);
      }
    }
657
658
    if (d[n] == INT64_MAX) {
      rd[n][0] = rd[n][1] = INT64_MAX;
659
    } else if (s[n]) {
660
661
662
663
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
664
    }
665

666
667
668
    // Early termination in transform size search.
    if (cpi->sf.tx_size_search_breakout &&
        (rd[n][1] == INT64_MAX ||
669
        (n < (int) max_tx_size && rd[n][1] > rd[n + 1][1]) ||
670
671
672
        s[n] == 1))
      break;

673
674
675
676
    if (rd[n][1] < best_rd) {
      best_tx = n;
      best_rd = rd[n][1];
    }
677
  }
678
679
  mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
                      best_tx : MIN(max_tx_size, max_mode_tx_size);
680
681


682
683
684
  *distortion = d[mbmi->tx_size];
  *rate       = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT];
  *skip       = s[mbmi->tx_size];
685
  *psse       = sse[mbmi->tx_size];
686

687
688
689
690
  tx_cache[ONLY_4X4] = rd[TX_4X4][0];
  tx_cache[ALLOW_8X8] = rd[TX_8X8][0];
  tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0];
  tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0];
691

692
693
694
695
  if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
    tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
  } else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) {
    tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
Deb Mukherjee's avatar
Deb Mukherjee committed
696
  } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
697
    tx_cache[TX_MODE_SELECT] = rd[TX_8X8][1];
Deb Mukherjee's avatar
Deb Mukherjee committed
698
  } else {
699
    tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1];
700
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
701
}
702

703
704
705
706
707
static void super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
                            int64_t *distortion, int *skip,
                            int64_t *psse, BLOCK_SIZE bs,
                            int64_t txfm_cache[TX_MODES],
                            int64_t ref_best_rd) {
Jim Bankoski's avatar
Jim Bankoski committed
708
  MACROBLOCKD *xd = &x->e_mbd;
709
710
  int64_t sse;
  int64_t *ret_sse = psse ? psse : &sse;
Yaowu Xu's avatar
Yaowu Xu committed
711

hkuang's avatar
hkuang committed
712
  assert(bs == xd->mi[0].src_mi->mbmi.sb_type);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
713

714
  if (cpi->sf.tx_size_search_method == USE_LARGESTALL || xd->lossless) {
715
    vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
716
    choose_largest_tx_size(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd,
717
                           bs);
718
  } else {
719
    choose_tx_size_from_rd(cpi, x, rate, distortion, skip, ret_sse,
720
                           txfm_cache, ref_best_rd, bs);
721
722
723
  }
}

724
725
static int conditional_skipintra(PREDICTION_MODE mode,
                                 PREDICTION_MODE best_intra_mode) {
726
727
728
729
730
731
732
733
  if (mode == D117_PRED &&
      best_intra_mode != V_PRED &&
      best_intra_mode != D135_PRED)
    return 1;
  if (mode == D63_PRED &&
      best_intra_mode != V_PRED &&
      best_intra_mode != D45_PRED)
    return 1;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
734
  if (mode == D207_PRED &&
735
736
737
738
739
740
741
742
743
744
      best_intra_mode != H_PRED &&
      best_intra_mode != D45_PRED)
    return 1;
  if (mode == D153_PRED &&
      best_intra_mode != H_PRED &&
      best_intra_mode != D135_PRED)
    return 1;
  return 0;
}

745
static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
746
                                     PREDICTION_MODE *best_mode,
747
                                     const int *bmode_costs,
748
749
                                     ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                                     int *bestrate, int *bestratey,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
750
                                     int64_t *bestdistortion,
751
                                     BLOCK_SIZE bsize, int64_t rd_thresh) {
752
  PREDICTION_MODE mode;
753
  MACROBLOCKD *const xd = &x->e_mbd;
754
  int64_t best_rd = rd_thresh;
755

756
757
758
  struct macroblock_plane *p = &x->plane[0];
  struct macroblockd_plane *pd = &xd->plane[0];
  const int src_stride = p->src.stride;
759
  const int dst_stride = pd->dst.stride;
760
761
762
763
  const uint8_t *src_init = &p->src.buf[raster_block_offset(BLOCK_8X8, ib,
                                                            src_stride)];
  uint8_t *dst_init = &pd->dst.buf[raster_block_offset(BLOCK_8X8, ib,
                                                       dst_stride)];
764
765
  ENTROPY_CONTEXT ta[2], tempa[2];
  ENTROPY_CONTEXT tl[2], templ[2];
766

Jim Bankoski's avatar
Jim Bankoski committed
767
768
  const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
  const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
769
  int idx, idy;
770
  uint8_t best_dst[8 * 8];
771
772
773
#if CONFIG_VP9_HIGHBITDEPTH
  uint16_t best_dst16[8 * 8];
#endif
John Koleszar's avatar
John Koleszar committed
774

Jingning Han's avatar
Jingning Han committed
775
  assert(ib < 4);
776

777
778
  vpx_memcpy(ta, a, sizeof(ta));
  vpx_memcpy(tl, l, sizeof(tl));
hkuang's avatar
hkuang committed
779
  xd->mi[0].src_mi->mbmi.tx_size = TX_4X4;
780

781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
#if CONFIG_VP9_HIGHBITDEPTH
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
      int64_t this_rd;
      int ratey = 0;
      int64_t distortion = 0;
      int rate = bmode_costs[mode];

      if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
        continue;

      // Only do the oblique modes if the best so far is
      // one of the neighboring directional modes
      if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
        if (conditional_skipintra(mode, *best_mode))
            continue;
      }

      vpx_memcpy(tempa, ta, sizeof(ta));
      vpx_memcpy(templ, tl, sizeof(tl));

      for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
        for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
          const int block = ib + idy * 2 + idx;
          const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
          uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
          int16_t *const src_diff = raster_block_offset_int16(BLOCK_8X8, block,
                                                              p->src_diff);
          tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
          xd->mi[0].src_mi->bmi[block].as_mode = mode;
          vp9_predict_intra_block(xd, block, 1,
                                  TX_4X4, mode,
                                  x->skip_encode ? src : dst,
                                  x->skip_encode ? src_stride : dst_stride,
                                  dst, dst_stride, idx, idy, 0);
816
817
          vp9_highbd_subtract_block(4, 4, src_diff, 8, src, src_stride,
                                    dst, dst_stride, xd->bd);
818
819
          if (xd->lossless) {
            const scan_order *so = &vp9_default_scan_orders[TX_4X4];
820
            vp9_highbd_fwht4x4(src_diff, coeff, 8);
821
822
823
824
825
826
            vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
            ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
                                 so->scan, so->neighbors,
                                 cpi->sf.use_fast_coef_costing);
            if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
              goto next_highbd;
827
828
829
            vp9_highbd_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block),
                                   dst, dst_stride,
                                   p->eobs[block], xd->bd);
830
831
832
833
          } else {
            int64_t unused;
            const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block);
            const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type];
834
            vp9_highbd_fht4x4(src_diff, coeff, 8, tx_type);
835
836
837
838
            vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
            ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
                                 so->scan, so->neighbors,
                                 cpi->sf.use_fast_coef_costing);
839
840
841
            distortion += vp9_highbd_block_error(
                coeff, BLOCK_OFFSET(pd->dqcoeff, block),
                16, &unused, xd->bd) >> 2;
842
843
            if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
              goto next_highbd;
844
845
            vp9_highbd_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block),
                                  dst, dst_stride, p->eobs[block], xd->bd);
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
          }
        }
      }

      rate += ratey;
      this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);

      if (this_rd < best_rd) {
        *bestrate = rate;
        *bestratey = ratey;
        *bestdistortion = distortion;
        best_rd = this_rd;
        *best_mode = mode;
        vpx_memcpy(a, tempa, sizeof(tempa));
        vpx_memcpy(l, templ, sizeof(templ));
        for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
          vpx_memcpy(best_dst16 + idy * 8,
                     CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
                     num_4x4_blocks_wide * 4 * sizeof(uint16_t));
        }
      }
    next_highbd:
      {}
    }
    if (best_rd >= rd_thresh || x->skip_encode)
      return best_rd;

    for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
      vpx_memcpy(CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
                 best_dst16 + idy * 8,
                 num_4x4_blocks_wide * 4 * sizeof(uint16_t));
    }

    return best_rd;
  }
#endif  // CONFIG_VP9_HIGHBITDEPTH

883
  for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
884
    int64_t this_rd;
885
    int ratey = 0;
886
887
    int64_t distortion = 0;
    int rate = bmode_costs[mode];
888

889
    if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
890
891
      continue;

892
893
    // Only do the oblique modes if the best so far is
    // one of the neighboring directional modes
894
895
    if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
      if (conditional_skipintra(mode, *best_mode))
896
897
          continue;
    }
898

899
900
901
    vpx_memcpy(tempa, ta, sizeof(ta));
    vpx_memcpy(templ, tl, sizeof(tl));

Jim Bankoski's avatar
Jim Bankoski committed
902
903
    for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
      for (