vp9_rdopt.c 136 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
11
 */

#include <assert.h>
12
#include <math.h>
13

14
15
16
17
18
19
#include "./vp9_rtcd.h"

#include "vpx_mem/vpx_mem.h"

#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_entropy.h"
20
#include "vp9/common/vp9_entropymode.h"
21
22
23
24
#include "vp9/common/vp9_idct.h"
#include "vp9/common/vp9_mvref_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_quant_common.h"
25
26
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
27
28
29
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_systemdependent.h"

Dmitry Kovalev's avatar
Dmitry Kovalev committed
30
#include "vp9/encoder/vp9_cost.h"
31
#include "vp9/encoder/vp9_encodemb.h"
32
#include "vp9/encoder/vp9_encodemv.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
33
#include "vp9/encoder/vp9_encoder.h"
34
#include "vp9/encoder/vp9_mcomp.h"
35
#include "vp9/encoder/vp9_quantize.h"
36
#include "vp9/encoder/vp9_ratectrl.h"
37
#include "vp9/encoder/vp9_rd.h"
38
39
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_variance.h"
Paul Wilkins's avatar
Paul Wilkins committed
40

41
42
#define RD_THRESH_MAX_FACT 64
#define RD_THRESH_INC      1
43

44
45
46
#define LAST_FRAME_MODE_MASK    0xFFEDCD60
#define GOLDEN_FRAME_MODE_MASK  0xFFDA3BB0
#define ALT_REF_MODE_MASK       0xFFC648D0
47

Paul Wilkins's avatar
Paul Wilkins committed
48
49
#define MIN_EARLY_TERM_INDEX    3

50
typedef struct {
51
  PREDICTION_MODE mode;
52
53
54
55
56
57
58
  MV_REFERENCE_FRAME ref_frame[2];
} MODE_DEFINITION;

typedef struct {
  MV_REFERENCE_FRAME ref_frame[2];
} REF_DEFINITION;

Alex Converse's avatar
Alex Converse committed
59
60
61
62
63
64
65
66
67
68
69
70
71
struct rdcost_block_args {
  MACROBLOCK *x;
  ENTROPY_CONTEXT t_above[16];
  ENTROPY_CONTEXT t_left[16];
  int rate;
  int64_t dist;
  int64_t sse;
  int this_rate;
  int64_t this_dist;
  int64_t this_sse;
  int64_t this_rd;
  int64_t best_rd;
  int skip;
72
  int use_fast_coef_costing;
73
  const scan_order *so;
Alex Converse's avatar
Alex Converse committed
74
75
};

76
static const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
  {NEARESTMV, {LAST_FRAME,   NONE}},
  {NEARESTMV, {ALTREF_FRAME, NONE}},
  {NEARESTMV, {GOLDEN_FRAME, NONE}},

  {DC_PRED,   {INTRA_FRAME,  NONE}},

  {NEWMV,     {LAST_FRAME,   NONE}},
  {NEWMV,     {ALTREF_FRAME, NONE}},
  {NEWMV,     {GOLDEN_FRAME, NONE}},

  {NEARMV,    {LAST_FRAME,   NONE}},
  {NEARMV,    {ALTREF_FRAME, NONE}},
  {NEARESTMV, {LAST_FRAME,   ALTREF_FRAME}},
  {NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}},

  {TM_PRED,   {INTRA_FRAME,  NONE}},

  {NEARMV,    {LAST_FRAME,   ALTREF_FRAME}},
  {NEWMV,     {LAST_FRAME,   ALTREF_FRAME}},
  {NEARMV,    {GOLDEN_FRAME, NONE}},
  {NEARMV,    {GOLDEN_FRAME, ALTREF_FRAME}},
  {NEWMV,     {GOLDEN_FRAME, ALTREF_FRAME}},

  {ZEROMV,    {LAST_FRAME,   NONE}},
  {ZEROMV,    {GOLDEN_FRAME, NONE}},
  {ZEROMV,    {ALTREF_FRAME, NONE}},
  {ZEROMV,    {LAST_FRAME,   ALTREF_FRAME}},
  {ZEROMV,    {GOLDEN_FRAME, ALTREF_FRAME}},

  {H_PRED,    {INTRA_FRAME,  NONE}},
  {V_PRED,    {INTRA_FRAME,  NONE}},
  {D135_PRED, {INTRA_FRAME,  NONE}},
  {D207_PRED, {INTRA_FRAME,  NONE}},
  {D153_PRED, {INTRA_FRAME,  NONE}},
  {D63_PRED,  {INTRA_FRAME,  NONE}},
  {D117_PRED, {INTRA_FRAME,  NONE}},
  {D45_PRED,  {INTRA_FRAME,  NONE}},
114
115
};

116
static const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
117
118
119
120
121
122
  {{LAST_FRAME,   NONE}},
  {{GOLDEN_FRAME, NONE}},
  {{ALTREF_FRAME, NONE}},
  {{LAST_FRAME,   ALTREF_FRAME}},
  {{GOLDEN_FRAME, ALTREF_FRAME}},
  {{INTRA_FRAME,  NONE}},
John Koleszar's avatar
John Koleszar committed
123
124
};

125
126
127
128
129
130
131
132
133
static int raster_block_offset(BLOCK_SIZE plane_bsize,
                               int raster_block, int stride) {
  const int bw = b_width_log2(plane_bsize);
  const int y = 4 * (raster_block >> bw);
  const int x = 4 * (raster_block & ((1 << bw) - 1));
  return y * stride + x;
}
static int16_t* raster_block_offset_int16(BLOCK_SIZE plane_bsize,
                                          int raster_block, int16_t *base) {
134
  const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
135
136
137
  return base + raster_block_offset(plane_bsize, raster_block, stride);
}

138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
                           int m, int n, int min_plane, int max_plane) {
  int i;

  for (i = min_plane; i < max_plane; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &x->e_mbd.plane[i];

    p->coeff    = ctx->coeff_pbuf[i][m];
    p->qcoeff   = ctx->qcoeff_pbuf[i][m];
    pd->dqcoeff = ctx->dqcoeff_pbuf[i][m];
    p->eobs     = ctx->eobs_pbuf[i][m];

    ctx->coeff_pbuf[i][m]   = ctx->coeff_pbuf[i][n];
    ctx->qcoeff_pbuf[i][m]  = ctx->qcoeff_pbuf[i][n];
    ctx->dqcoeff_pbuf[i][m] = ctx->dqcoeff_pbuf[i][n];
    ctx->eobs_pbuf[i][m]    = ctx->eobs_pbuf[i][n];

    ctx->coeff_pbuf[i][n]   = p->coeff;
    ctx->qcoeff_pbuf[i][n]  = p->qcoeff;
    ctx->dqcoeff_pbuf[i][n] = pd->dqcoeff;
    ctx->eobs_pbuf[i][n]    = p->eobs;
  }
}

163
static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
Deb Mukherjee's avatar
Deb Mukherjee committed
164
165
166
167
168
                            MACROBLOCK *x, MACROBLOCKD *xd,
                            int *out_rate_sum, int64_t *out_dist_sum) {
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
169
170
171
  int i;
  int64_t rate_sum = 0;
  int64_t dist_sum = 0;
172
  const int ref = xd->mi[0]->mbmi.ref_frame[0];
173
  unsigned int sse;
Deb Mukherjee's avatar
Deb Mukherjee committed
174
175
176
177

  for (i = 0; i < MAX_MB_PLANE; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &xd->plane[i];
178
    const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
179

Deb Mukherjee's avatar
Deb Mukherjee committed
180
    (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
181
                              pd->dst.buf, pd->dst.stride, &sse);
182

183
184
    if (i == 0)
      x->pred_sse[ref] = sse;
185
186

    // Fast approximate the modelling function.
187
    if (cpi->oxcf.speed > 4) {
188
      int64_t rate;
189
190
191
192
      int64_t dist;
      int64_t square_error = sse;
      int quantizer = (pd->dequant[1] >> 3);

193
194
      if (quantizer < 120)
        rate = (square_error * (280 - quantizer)) >> 8;
195
196
197
198
199
      else
        rate = 0;
      dist = (square_error * quantizer) >> 8;
      rate_sum += rate;
      dist_sum += dist;
200
201
202
    } else {
      int rate;
      int64_t dist;
203
204
      vp9_model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
                                   pd->dequant[1] >> 3, &rate, &dist);
205
      rate_sum += rate;
206
      dist_sum += dist;
207
    }
Deb Mukherjee's avatar
Deb Mukherjee committed
208
209
  }

210
211
  *out_rate_sum = (int)rate_sum;
  *out_dist_sum = dist_sum << 4;
Deb Mukherjee's avatar
Deb Mukherjee committed
212
213
}

214
int64_t vp9_block_error_c(const int16_t *coeff, const int16_t *dqcoeff,
215
                          intptr_t block_size, int64_t *ssz) {
Ronald S. Bultje's avatar
Ronald S. Bultje committed
216
  int i;
217
  int64_t error = 0, sqcoeff = 0;
John Koleszar's avatar
John Koleszar committed
218

219
  for (i = 0; i < block_size; i++) {
220
221
222
    const int diff = coeff[i] - dqcoeff[i];
    error +=  diff * diff;
    sqcoeff += coeff[i] * coeff[i];
John Koleszar's avatar
John Koleszar committed
223
  }
John Koleszar's avatar
John Koleszar committed
224

225
  *ssz = sqcoeff;
John Koleszar's avatar
John Koleszar committed
226
  return error;
John Koleszar's avatar
John Koleszar committed
227
228
}

229
230
231
232
233
/* The trailing '0' is a terminator which is used inside cost_coeffs() to
 * decide whether to include cost of a trailing EOB node or not (i.e. we
 * can skip this if the last coefficient in this transform block, e.g. the
 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
 * were non-zero). */
234
static const int16_t band_counts[TX_SIZES][8] = {
235
236
237
238
  { 1, 2, 3, 4,  3,   16 - 13, 0 },
  { 1, 2, 3, 4, 11,   64 - 21, 0 },
  { 1, 2, 3, 4, 11,  256 - 21, 0 },
  { 1, 2, 3, 4, 11, 1024 - 21, 0 },
239
};
240
static INLINE int cost_coeffs(MACROBLOCK *x,
241
                              int plane, int block,
242
                              ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
John Koleszar's avatar
John Koleszar committed
243
                              TX_SIZE tx_size,
244
245
                              const int16_t *scan, const int16_t *nb,
                              int use_fast_coef_costing) {
246
  MACROBLOCKD *const xd = &x->e_mbd;
247
  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
248
249
  const struct macroblock_plane *p = &x->plane[plane];
  const struct macroblockd_plane *pd = &xd->plane[plane];
250
  const PLANE_TYPE type = pd->plane_type;
251
  const int16_t *band_count = &band_counts[tx_size][1];
252
  const int eob = p->eobs[block];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
253
  const int16_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
254
  unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
Dmitry Kovalev's avatar
Dmitry Kovalev committed
255
                   x->token_costs[tx_size][type][is_inter_block(mbmi)];
256
  uint8_t token_cache[32 * 32];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
257
  int pt = combine_entropy_contexts(*A, *L);
258
  int c, cost;
259
  // Check for consistency of tx_size with mode info
260
  assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
261
                              : get_uv_tx_size(mbmi, pd) == tx_size);
262

263
264
  if (eob == 0) {
    // single eob token
265
    cost = token_costs[0][0][pt][EOB_TOKEN];
266
    c = 0;
267
  } else {
268
    int band_left = *band_count++;
269
270

    // dc token
Dmitry Kovalev's avatar
Dmitry Kovalev committed
271
    int v = qcoeff[0];
272
    int prev_t = vp9_dct_value_tokens_ptr[v].token;
273
    cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
274
    token_cache[0] = vp9_pt_energy_class[prev_t];
275
    ++token_costs;
276
277
278
279

    // ac tokens
    for (c = 1; c < eob; c++) {
      const int rc = scan[c];
280
      int t;
281

Dmitry Kovalev's avatar
Dmitry Kovalev committed
282
      v = qcoeff[rc];
283
      t = vp9_dct_value_tokens_ptr[v].token;
284
285
286
      if (use_fast_coef_costing) {
        cost += (*token_costs)[!prev_t][!prev_t][t] + vp9_dct_value_cost_ptr[v];
      } else {
287
        pt = get_coef_context(nb, token_cache, c);
288
        cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
289
        token_cache[rc] = vp9_pt_energy_class[t];
290
      }
291
      prev_t = t;
292
      if (!--band_left) {
293
294
        band_left = *band_count++;
        ++token_costs;
295
      }
296
    }
297
298

    // eob token
299
    if (band_left) {
300
301
302
      if (use_fast_coef_costing) {
        cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
      } else {
303
        pt = get_coef_context(nb, token_cache, c);
304
305
        cost += (*token_costs)[0][pt][EOB_TOKEN];
      }
306
    }
307
308
  }

309
  // is eob first coefficient;
310
  *A = *L = (c > 0);
311

312
313
  return cost;
}
Alex Converse's avatar
Alex Converse committed
314
315
static void dist_block(int plane, int block, TX_SIZE tx_size,
                       struct rdcost_block_args* args) {
316
  const int ss_txfrm_size = tx_size << 1;
Deb Mukherjee's avatar
Deb Mukherjee committed
317
318
  MACROBLOCK* const x = args->x;
  MACROBLOCKD* const xd = &x->e_mbd;
319
320
  const struct macroblock_plane *const p = &x->plane[plane];
  const struct macroblockd_plane *const pd = &xd->plane[plane];
Deb Mukherjee's avatar
Deb Mukherjee committed
321
  int64_t this_sse;
Alex Converse's avatar
Alex Converse committed
322
  int shift = tx_size == TX_32X32 ? 0 : 2;
323
324
  int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
325
326
327
  args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
                               &this_sse) >> shift;
  args->sse  = this_sse >> shift;
328

329
  if (x->skip_encode && !is_inter_block(&xd->mi[0]->mbmi)) {
330
331
    // TODO(jingning): tune the model to better capture the distortion.
    int64_t p = (pd->dequant[1] * pd->dequant[1] *
332
                    (1 << ss_txfrm_size)) >> (shift + 2);
333
334
    args->dist += (p >> 4);
    args->sse  += p;
335
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
336
337
}

338
static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
Alex Converse's avatar
Alex Converse committed
339
                       TX_SIZE tx_size, struct rdcost_block_args* args) {
Deb Mukherjee's avatar
Deb Mukherjee committed
340
  int x_idx, y_idx;
Alex Converse's avatar
Alex Converse committed
341
  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x_idx, &y_idx);
Deb Mukherjee's avatar
Deb Mukherjee committed
342

343
  args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx,
Alex Converse's avatar
Alex Converse committed
344
                           args->t_left + y_idx, tx_size,
345
346
                           args->so->scan, args->so->neighbors,
                           args->use_fast_coef_costing);
Deb Mukherjee's avatar
Deb Mukherjee committed
347
348
}

349
350
static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
                          TX_SIZE tx_size, void *arg) {
Deb Mukherjee's avatar
Deb Mukherjee committed
351
352
353
  struct rdcost_block_args *args = arg;
  MACROBLOCK *const x = args->x;
  MACROBLOCKD *const xd = &x->e_mbd;
354
  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
355
  int64_t rd1, rd2, rd;
Deb Mukherjee's avatar
Deb Mukherjee committed
356

357
358
359
  if (args->skip)
    return;

360
  if (!is_inter_block(mbmi))
361
    vp9_encode_block_intra(x, plane, block, plane_bsize, tx_size, &mbmi->skip);
Deb Mukherjee's avatar
Deb Mukherjee committed
362
  else
363
    vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
Deb Mukherjee's avatar
Deb Mukherjee committed
364

365
366
  dist_block(plane, block, tx_size, args);
  rate_block(plane, block, plane_bsize, tx_size, args);
367
368
  rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
  rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
369
370

  // TODO(jingning): temporarily enabled only for luma component
371
  rd = MIN(rd1, rd2);
Yaowu Xu's avatar
Yaowu Xu committed
372
  if (plane == 0)
373
    x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] ||
Yaowu Xu's avatar
Yaowu Xu committed
374
                                    (rd1 > rd2 && !xd->lossless);
375

376
377
378
  args->this_rate += args->rate;
  args->this_dist += args->dist;
  args->this_sse  += args->sse;
379
380
381
382
383
384
  args->this_rd += rd;

  if (args->this_rd > args->best_rd) {
    args->skip = 1;
    return;
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
385
386
}

387
static void txfm_rd_in_plane(MACROBLOCK *x,
388
389
390
                             int *rate, int64_t *distortion,
                             int *skippable, int64_t *sse,
                             int64_t ref_best_rd, int plane,
391
392
                             BLOCK_SIZE bsize, TX_SIZE tx_size,
                             int use_fast_coef_casting) {
Deb Mukherjee's avatar
Deb Mukherjee committed
393
  MACROBLOCKD *const xd = &x->e_mbd;
394
  const struct macroblockd_plane *const pd = &xd->plane[plane];
395
396
  struct rdcost_block_args args;
  vp9_zero(args);
397
398
  args.x = x;
  args.best_rd = ref_best_rd;
399
  args.use_fast_coef_costing = use_fast_coef_casting;
400

401
  if (plane == 0)
402
    xd->mi[0]->mbmi.tx_size = tx_size;
403

404
  vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
405

406
  args.so = get_scan(xd, tx_size, pd->plane_type, 0);
Deb Mukherjee's avatar
Deb Mukherjee committed
407

408
  vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
409
410
                                         block_rd_txfm, &args);
  if (args.skip) {
411
412
413
414
415
    *rate       = INT_MAX;
    *distortion = INT64_MAX;
    *sse        = INT64_MAX;
    *skippable  = 0;
  } else {
416
417
418
    *distortion = args.this_dist;
    *rate       = args.this_rate;
    *sse        = args.this_sse;
419
    *skippable  = vp9_is_skippable_in_plane(x, bsize, plane);
420
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
421
422
}

423
424
425
426
427
static void choose_largest_tx_size(VP9_COMP *cpi, MACROBLOCK *x,
                                   int *rate, int64_t *distortion,
                                   int *skip, int64_t *sse,
                                   int64_t ref_best_rd,
                                   BLOCK_SIZE bs) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
428
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
Deb Mukherjee's avatar
Deb Mukherjee committed
429
  VP9_COMMON *const cm = &cpi->common;
Yaowu Xu's avatar
Yaowu Xu committed
430
  const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
Deb Mukherjee's avatar
Deb Mukherjee committed
431
  MACROBLOCKD *const xd = &x->e_mbd;
432
  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Yaowu Xu's avatar
Yaowu Xu committed
433
434
435

  mbmi->tx_size = MIN(max_tx_size, largest_tx_size);

Alex Converse's avatar
Alex Converse committed
436
  txfm_rd_in_plane(x, rate, distortion, skip,
437
                   &sse[mbmi->tx_size], ref_best_rd, 0, bs,
438
                   mbmi->tx_size, cpi->sf.use_fast_coef_costing);
Dmitry Kovalev's avatar
Dmitry Kovalev committed
439
  cpi->tx_stepdown_count[0]++;
Deb Mukherjee's avatar
Deb Mukherjee committed
440
441
}

442
443
444
445
446
447
static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
                                   int (*r)[2], int *rate,
                                   int64_t *d, int64_t *distortion,
                                   int *s, int *skip,
                                   int64_t tx_cache[TX_MODES],
                                   BLOCK_SIZE bs) {
448
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
449
450
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
451
  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
452
  vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
453
454
455
456
  int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
                             {INT64_MAX, INT64_MAX},
                             {INT64_MAX, INT64_MAX},
                             {INT64_MAX, INT64_MAX}};
Yaowu Xu's avatar
Yaowu Xu committed
457
  TX_SIZE n, m;
458
  int s0, s1;
459
460
461
  const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
  int64_t best_rd = INT64_MAX;
  TX_SIZE best_tx = TX_4X4;
462

463
  const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
464
465
466
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
467

468
  for (n = TX_4X4; n <= max_tx_size; n++) {
469
470
471
472
473
474
475
476
477
    r[n][1] = r[n][0];
    if (r[n][0] < INT_MAX) {
      for (m = 0; m <= n - (n == max_tx_size); m++) {
        if (m == n)
          r[n][1] += vp9_cost_zero(tx_probs[m]);
        else
          r[n][1] += vp9_cost_one(tx_probs[m]);
      }
    }
478
479
    if (d[n] == INT64_MAX) {
      rd[n][0] = rd[n][1] = INT64_MAX;
480
    } else if (s[n]) {
481
482
483
484
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
485
    }
486
487
488
489
490

    if (rd[n][1] < best_rd) {
      best_tx = n;
      best_rd = rd[n][1];
    }
491
  }
492
493
  mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
                      best_tx : MIN(max_tx_size, max_mode_tx_size);
494
495


496
497
498
  *distortion = d[mbmi->tx_size];
  *rate       = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT];
  *skip       = s[mbmi->tx_size];
499

500
501
502
503
  tx_cache[ONLY_4X4] = rd[TX_4X4][0];
  tx_cache[ALLOW_8X8] = rd[TX_8X8][0];
  tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0];
  tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0];
504

505
506
  if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
    tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
507
    cpi->tx_stepdown_count[0]++;
508
509
  } else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) {
    tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
510
    cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
Deb Mukherjee's avatar
Deb Mukherjee committed
511
  } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
512
    tx_cache[TX_MODE_SELECT] = rd[TX_8X8][1];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
513
    cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
Deb Mukherjee's avatar
Deb Mukherjee committed
514
  } else {
515
    tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
516
    cpi->tx_stepdown_count[max_tx_size - TX_4X4]++;
517
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
518
}
519

520
521
522
523
524
static void inter_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
                                  int64_t *distortion, int *skip,
                                  int64_t *psse, BLOCK_SIZE bs,
                                  int64_t txfm_cache[TX_MODES],
                                  int64_t ref_best_rd) {
525
526
  int r[TX_SIZES][2], s[TX_SIZES];
  int64_t d[TX_SIZES], sse[TX_SIZES];
Jim Bankoski's avatar
Jim Bankoski committed
527
  MACROBLOCKD *xd = &x->e_mbd;
528
  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Yaowu Xu's avatar
Yaowu Xu committed
529
530
531
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
  TX_SIZE tx_size;

532
  assert(bs == mbmi->sb_type);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
533

534
535
  vp9_subtract_plane(x, bs, 0);

536
  if (cpi->sf.tx_size_search_method == USE_LARGESTALL || xd->lossless) {
537
    vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
538
539
    choose_largest_tx_size(cpi, x, rate, distortion, skip, sse, ref_best_rd,
                           bs);
540
    if (psse)
541
      *psse = sse[mbmi->tx_size];
Jim Bankoski's avatar
Jim Bankoski committed
542
543
    return;
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
544

545
546
547
548
  for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size)
    txfm_rd_in_plane(x, &r[tx_size][0], &d[tx_size], &s[tx_size],
                     &sse[tx_size], ref_best_rd, 0, bs, tx_size,
                     cpi->sf.use_fast_coef_costing);
549
550
  choose_tx_size_from_rd(cpi, x, r, rate, d, distortion, s,
                         skip, txfm_cache, bs);
551

552
  if (psse)
553
    *psse = sse[mbmi->tx_size];
Ronald S. Bultje's avatar
Ronald S. Bultje committed
554
}
Ronald S. Bultje's avatar
Ronald S. Bultje committed
555

556
557
558
559
560
561
562
static void intra_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
                                  int64_t *distortion, int *skip,
                                  int64_t *psse, BLOCK_SIZE bs,
                                  int64_t txfm_cache[TX_MODES],
                                  int64_t ref_best_rd) {
  int64_t sse[TX_SIZES];
  MACROBLOCKD *xd = &x->e_mbd;
563
  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
564
565

  assert(bs == mbmi->sb_type);
566
  if (cpi->sf.tx_size_search_method != USE_FULL_RD || xd->lossless) {
567
    vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
568
569
    choose_largest_tx_size(cpi, x, rate, distortion, skip, sse, ref_best_rd,
                           bs);
570
571
572
573
574
575
576
  } else {
    int r[TX_SIZES][2], s[TX_SIZES];
    int64_t d[TX_SIZES];
    TX_SIZE tx_size;
    for (tx_size = TX_4X4; tx_size <= max_txsize_lookup[bs]; ++tx_size)
      txfm_rd_in_plane(x, &r[tx_size][0], &d[tx_size],
                       &s[tx_size], &sse[tx_size],
577
578
                       ref_best_rd, 0, bs, tx_size,
                       cpi->sf.use_fast_coef_costing);
579
580
    choose_tx_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache,
                           bs);
581
582
583
584
585
586
  }
  if (psse)
    *psse = sse[mbmi->tx_size];
}


587
588
static int conditional_skipintra(PREDICTION_MODE mode,
                                 PREDICTION_MODE best_intra_mode) {
589
590
591
592
593
594
595
596
  if (mode == D117_PRED &&
      best_intra_mode != V_PRED &&
      best_intra_mode != D135_PRED)
    return 1;
  if (mode == D63_PRED &&
      best_intra_mode != V_PRED &&
      best_intra_mode != D45_PRED)
    return 1;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
597
  if (mode == D207_PRED &&
598
599
600
601
602
603
604
605
606
607
      best_intra_mode != H_PRED &&
      best_intra_mode != D45_PRED)
    return 1;
  if (mode == D153_PRED &&
      best_intra_mode != H_PRED &&
      best_intra_mode != D135_PRED)
    return 1;
  return 0;
}

608
static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
609
                                     PREDICTION_MODE *best_mode,
610
                                     const int *bmode_costs,
611
612
                                     ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                                     int *bestrate, int *bestratey,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
613
                                     int64_t *bestdistortion,
614
                                     BLOCK_SIZE bsize, int64_t rd_thresh) {
615
  PREDICTION_MODE mode;
616
  MACROBLOCKD *const xd = &x->e_mbd;
617
  int64_t best_rd = rd_thresh;
618

619
620
621
  struct macroblock_plane *p = &x->plane[0];
  struct macroblockd_plane *pd = &xd->plane[0];
  const int src_stride = p->src.stride;
622
  const int dst_stride = pd->dst.stride;
623
624
625
626
  const uint8_t *src_init = &p->src.buf[raster_block_offset(BLOCK_8X8, ib,
                                                            src_stride)];
  uint8_t *dst_init = &pd->dst.buf[raster_block_offset(BLOCK_8X8, ib,
                                                       dst_stride)];
627
628
  ENTROPY_CONTEXT ta[2], tempa[2];
  ENTROPY_CONTEXT tl[2], templ[2];
629

Jim Bankoski's avatar
Jim Bankoski committed
630
631
  const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
  const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
632
  int idx, idy;
633
  uint8_t best_dst[8 * 8];
John Koleszar's avatar
John Koleszar committed
634

Jingning Han's avatar
Jingning Han committed
635
  assert(ib < 4);
636

637
638
  vpx_memcpy(ta, a, sizeof(ta));
  vpx_memcpy(tl, l, sizeof(tl));
639
  xd->mi[0]->mbmi.tx_size = TX_4X4;
640
641

  for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
642
    int64_t this_rd;
643
    int ratey = 0;
644
645
    int64_t distortion = 0;
    int rate = bmode_costs[mode];
646

647
    if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
648
649
      continue;

650
651
    // Only do the oblique modes if the best so far is
    // one of the neighboring directional modes
652
653
    if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
      if (conditional_skipintra(mode, *best_mode))
654
655
          continue;
    }
656

657
658
659
    vpx_memcpy(tempa, ta, sizeof(ta));
    vpx_memcpy(templ, tl, sizeof(tl));

Jim Bankoski's avatar
Jim Bankoski committed
660
661
    for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
      for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
662
        const int block = ib + idy * 2 + idx;
663
664
665
666
667
        const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
        uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
        int16_t *const src_diff = raster_block_offset_int16(BLOCK_8X8, block,
                                                            p->src_diff);
        int16_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
668
        xd->mi[0]->bmi[block].as_mode = mode;
669
        vp9_predict_intra_block(xd, block, 1,
670
                                TX_4X4, mode,
671
672
                                x->skip_encode ? src : dst,
                                x->skip_encode ? src_stride : dst_stride,
673
                                dst, dst_stride, idx, idy, 0);
674
675
676
677
678
679
680
        vp9_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride);

        if (xd->lossless) {
          const scan_order *so = &vp9_default_scan_orders[TX_4X4];
          vp9_fwht4x4(src_diff, coeff, 8);
          vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
          ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
681
682
                               so->scan, so->neighbors,
                               cpi->sf.use_fast_coef_costing);
683
684
685
686
687
688
689
690
          if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
            goto next;
          vp9_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block), dst, dst_stride,
                          p->eobs[block]);
        } else {
          int64_t unused;
          const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block);
          const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type];
691
          vp9_fht4x4(src_diff, coeff, 8, tx_type);
692
693
          vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
          ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
694
695
                             so->scan, so->neighbors,
                             cpi->sf.use_fast_coef_costing);
696
697
698
699
700
701
702
          distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block),
                                        16, &unused) >> 2;
          if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
            goto next;
          vp9_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block),
                         dst, dst_stride, p->eobs[block]);
        }
703
704
      }
    }
Jingning Han's avatar
Jingning Han committed
705

706
707
    rate += ratey;
    this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
Jingning Han's avatar
Jingning Han committed
708

709
710
711
712
713
714
    if (this_rd < best_rd) {
      *bestrate = rate;
      *bestratey = ratey;
      *bestdistortion = distortion;
      best_rd = this_rd;
      *best_mode = mode;
715
716
      vpx_memcpy(a, tempa, sizeof(tempa));
      vpx_memcpy(l, templ, sizeof(templ));
717
718
719
      for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
        vpx_memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
                   num_4x4_blocks_wide * 4);
John Koleszar's avatar
John Koleszar committed
720
    }
721
722
  next:
    {}
John Koleszar's avatar
John Koleszar committed
723
  }
724

725
  if (best_rd >= rd_thresh || x->skip_encode)
726
727
    return best_rd;

728
729
730
  for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
    vpx_memcpy(dst_init + idy * dst_stride, best_dst + idy * 8,
               num_4x4_blocks_wide * 4);
John Koleszar's avatar
John Koleszar committed
731

John Koleszar's avatar
John Koleszar committed
732
  return best_rd;
John Koleszar's avatar
John Koleszar committed
733
734
}

735
736
737
static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP *cpi, MACROBLOCK *mb,
                                            int *rate, int *rate_y,
                                            int64_t *distortion,
738
                                            int64_t best_rd) {
739
  int i, j;
740
  const MACROBLOCKD *const xd = &mb->e_mbd;
741
742
743
744
  MODE_INFO *const mic = xd->mi[0];
  const MODE_INFO *above_mi = xd->mi[-xd->mi_stride];
  const MODE_INFO *left_mi = xd->left_available ? xd->mi[-1] : NULL;
  const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
Jim Bankoski's avatar
Jim Bankoski committed
745
746
  const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
  const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
747
  int idx, idy;
748
  int cost = 0;
749
  int64_t total_distortion = 0;
John Koleszar's avatar
John Koleszar committed
750
751
  int tot_rate_y = 0;
  int64_t total_rd = 0;
752
  ENTROPY_CONTEXT t_above[4], t_left[4];
753
  const int *bmode_costs = cpi->mbmode_cost;
John Koleszar's avatar
John Koleszar committed
754

755
756
  vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
  vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
John Koleszar's avatar
John Koleszar committed
757

758
  // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
Jim Bankoski's avatar
Jim Bankoski committed
759
760
  for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
    for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
761
      PREDICTION_MODE best_mode = DC_PRED;
762
763
      int r = INT_MAX, ry = INT_MAX;
      int64_t d = INT64_MAX, this_rd = INT64_MAX;
764
      i = idy * 2 + idx;
765
      if (cpi->common.frame_type == KEY_FRAME) {
766
767
        const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, i);
        const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, i);
768

769
        bmode_costs  = cpi->y_mode_costs[A][L];
770
      }
771

772
      this_rd = rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs,
Adrian Grange's avatar
Adrian Grange committed
773
774
                                      t_above + idx, t_left + idy, &r, &ry, &d,
                                      bsize, best_rd - total_rd);
775
776
777
778
      if (this_rd >= best_rd - total_rd)
        return INT64_MAX;

      total_rd += this_rd;
779
      cost += r;
780
      total_distortion += d;
781
782
      tot_rate_y += ry;

783
      mic->bmi[i].as_mode = best_mode;
Jim Bankoski's avatar
Jim Bankoski committed
784
      for (j = 1; j < num_4x4_blocks_high; ++j)
785
        mic->bmi[i + j * 2].as_mode = best_mode;
Jim Bankoski's avatar
Jim Bankoski committed
786
      for (j = 1; j < num_4x4_blocks_wide; ++j)
787
        mic->bmi[i + j].as_mode = best_mode;
788
789

      if (total_rd >= best_rd)
790
        return INT64_MAX;
John Koleszar's avatar
John Koleszar committed
791
792
    }
  }
John Koleszar's avatar
John Koleszar committed
793

794
  *rate = cost;
795
  *rate_y = tot_rate_y;
796
  *distortion = total_distortion;
797
  mic->mbmi.mode = mic->bmi[3].as_mode;
John Koleszar's avatar
John Koleszar committed
798

799
  return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);
John Koleszar's avatar
John Koleszar committed
800
}
801

802
803
static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
                                      int *rate, int *rate_tokenonly,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
804
                                      int64_t *distortion, int *skippable,
805
                                      BLOCK_SIZE bsize,
806
                                      int64_t tx_cache[TX_MODES],
807
                                      int64_t best_rd) {
808
809
  PREDICTION_MODE mode;
  PREDICTION_MODE mode_selected = DC_PRED;
Jim Bankoski's avatar
Jim Bankoski committed
810
  MACROBLOCKD *const xd = &x->e_mbd;
811
  MODE_INFO *const mic = xd->mi[0];
Ronald S. Bultje's avatar
Ronald S. Bultje committed
812
  int this_rate, this_rate_tokenonly, s;
813
  int64_t this_distortion, this_rd;
814
  TX_SIZE best_tx = TX_4X4;
815
  int i;
816
  int *bmode_costs = cpi->mbmode_cost;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
817

818
  if (cpi->sf.tx_size_search_method == USE_FULL_RD)
819
    for (i = 0; i < TX_MODES; i++)
820
      tx_cache[i] = INT64_MAX;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
821

822
  /* Y Search for intra prediction mode */
Ronald S. Bultje's avatar
Ronald S. Bultje committed
823
  for (mode = DC_PRED; mode <= TM_PRED; mode++) {
824
    int64_t local_tx_cache[TX_MODES];
825
826
    MODE_INFO *above_mi = xd->mi[-xd->mi_stride];
    MODE_INFO *left_mi = xd->left_available ? xd->mi[-1] : NULL;
827

828
    if (cpi->common.frame_type == KEY_FRAME) {
829
830
      const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0);
      const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0);
831

832
      bmode_costs = cpi->y_mode_costs[A][L];
833
    }
834
    mic->mbmi.mode = mode;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
835

836
837
    intra_super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
        &s, NULL, bsize, local_tx_cache, best_rd);
838
839
840

    if (this_rate_tokenonly == INT_MAX)
      continue;
Jim Bankoski's avatar
Jim Bankoski committed
841

842
    this_rate = this_rate_tokenonly + bmode_costs[mode];
Ronald S. Bultje's avatar
Ronald S. Bultje committed
843
844
845
846
847
    this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);

    if (this_rd < best_rd) {
      mode_selected   = mode;
      best_rd         = this_rd;
848
      best_tx         = mic->mbmi.tx_size;
Ronald S. Bultje's avatar