vp9_rdopt.c 158 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
11
 */

#include <assert.h>
12
#include <math.h>
13

14
#include "./vp9_rtcd.h"
15
#include "./vpx_dsp_rtcd.h"
16

17
#include "vpx_dsp/vpx_dsp_common.h"
18
#include "vpx_mem/vpx_mem.h"
19
#include "vpx_ports/mem.h"
20
#include "vpx_ports/system_state.h"
21
22
23

#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_entropy.h"
24
#include "vp9/common/vp9_entropymode.h"
25
26
27
28
#include "vp9/common/vp9_idct.h"
#include "vp9/common/vp9_mvref_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_quant_common.h"
29
30
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
Scott LaVarnway's avatar
Scott LaVarnway committed
31
#include "vp9/common/vp9_scan.h"
32
33
#include "vp9/common/vp9_seg_common.h"

Dmitry Kovalev's avatar
Dmitry Kovalev committed
34
#include "vp9/encoder/vp9_cost.h"
35
#include "vp9/encoder/vp9_encodemb.h"
36
#include "vp9/encoder/vp9_encodemv.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
37
#include "vp9/encoder/vp9_encoder.h"
38
#include "vp9/encoder/vp9_mcomp.h"
39
#include "vp9/encoder/vp9_quantize.h"
40
#include "vp9/encoder/vp9_ratectrl.h"
41
#include "vp9/encoder/vp9_rd.h"
42
#include "vp9/encoder/vp9_rdopt.h"
43
#include "vp9/encoder/vp9_aq_variance.h"
Paul Wilkins's avatar
Paul Wilkins committed
44

45
46
47
48
49
50
51
52
#define LAST_FRAME_MODE_MASK    ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | \
                                 (1 << INTRA_FRAME))
#define GOLDEN_FRAME_MODE_MASK  ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | \
                                 (1 << INTRA_FRAME))
#define ALT_REF_MODE_MASK       ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | \
                                 (1 << INTRA_FRAME))

#define SECOND_REF_FRAME_MASK   ((1 << ALTREF_FRAME) | 0x01)
53

Paul Wilkins's avatar
Paul Wilkins committed
54
#define MIN_EARLY_TERM_INDEX    3
55
#define NEW_MV_DISCOUNT_FACTOR  8
Paul Wilkins's avatar
Paul Wilkins committed
56

57
typedef struct {
58
  PREDICTION_MODE mode;
59
60
61
62
63
64
65
  MV_REFERENCE_FRAME ref_frame[2];
} MODE_DEFINITION;

typedef struct {
  MV_REFERENCE_FRAME ref_frame[2];
} REF_DEFINITION;

Alex Converse's avatar
Alex Converse committed
66
67
68
69
70
71
72
73
74
struct rdcost_block_args {
  MACROBLOCK *x;
  ENTROPY_CONTEXT t_above[16];
  ENTROPY_CONTEXT t_left[16];
  int this_rate;
  int64_t this_dist;
  int64_t this_sse;
  int64_t this_rd;
  int64_t best_rd;
Alex Converse's avatar
Alex Converse committed
75
  int exit_early;
76
  int use_fast_coef_costing;
77
  const scan_order *so;
78
  uint8_t skippable;
Alex Converse's avatar
Alex Converse committed
79
80
};

81
#define LAST_NEW_MV_INDEX 6
82
static const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
83
84
85
86
87
88
89
90
91
92
93
94
  {NEARESTMV, {LAST_FRAME,   NONE}},
  {NEARESTMV, {ALTREF_FRAME, NONE}},
  {NEARESTMV, {GOLDEN_FRAME, NONE}},

  {DC_PRED,   {INTRA_FRAME,  NONE}},

  {NEWMV,     {LAST_FRAME,   NONE}},
  {NEWMV,     {ALTREF_FRAME, NONE}},
  {NEWMV,     {GOLDEN_FRAME, NONE}},

  {NEARMV,    {LAST_FRAME,   NONE}},
  {NEARMV,    {ALTREF_FRAME, NONE}},
Jingning Han's avatar
Jingning Han committed
95
96
97
98
99
100
  {NEARMV,    {GOLDEN_FRAME, NONE}},

  {ZEROMV,    {LAST_FRAME,   NONE}},
  {ZEROMV,    {GOLDEN_FRAME, NONE}},
  {ZEROMV,    {ALTREF_FRAME, NONE}},

101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
  {NEARESTMV, {LAST_FRAME,   ALTREF_FRAME}},
  {NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}},

  {TM_PRED,   {INTRA_FRAME,  NONE}},

  {NEARMV,    {LAST_FRAME,   ALTREF_FRAME}},
  {NEWMV,     {LAST_FRAME,   ALTREF_FRAME}},
  {NEARMV,    {GOLDEN_FRAME, ALTREF_FRAME}},
  {NEWMV,     {GOLDEN_FRAME, ALTREF_FRAME}},

  {ZEROMV,    {LAST_FRAME,   ALTREF_FRAME}},
  {ZEROMV,    {GOLDEN_FRAME, ALTREF_FRAME}},

  {H_PRED,    {INTRA_FRAME,  NONE}},
  {V_PRED,    {INTRA_FRAME,  NONE}},
  {D135_PRED, {INTRA_FRAME,  NONE}},
  {D207_PRED, {INTRA_FRAME,  NONE}},
  {D153_PRED, {INTRA_FRAME,  NONE}},
  {D63_PRED,  {INTRA_FRAME,  NONE}},
  {D117_PRED, {INTRA_FRAME,  NONE}},
  {D45_PRED,  {INTRA_FRAME,  NONE}},
122
123
};

124
static const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
125
126
127
128
129
130
  {{LAST_FRAME,   NONE}},
  {{GOLDEN_FRAME, NONE}},
  {{ALTREF_FRAME, NONE}},
  {{LAST_FRAME,   ALTREF_FRAME}},
  {{GOLDEN_FRAME, ALTREF_FRAME}},
  {{INTRA_FRAME,  NONE}},
John Koleszar's avatar
John Koleszar committed
131
132
};

133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
                           int m, int n, int min_plane, int max_plane) {
  int i;

  for (i = min_plane; i < max_plane; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &x->e_mbd.plane[i];

    p->coeff    = ctx->coeff_pbuf[i][m];
    p->qcoeff   = ctx->qcoeff_pbuf[i][m];
    pd->dqcoeff = ctx->dqcoeff_pbuf[i][m];
    p->eobs     = ctx->eobs_pbuf[i][m];

    ctx->coeff_pbuf[i][m]   = ctx->coeff_pbuf[i][n];
    ctx->qcoeff_pbuf[i][m]  = ctx->qcoeff_pbuf[i][n];
    ctx->dqcoeff_pbuf[i][m] = ctx->dqcoeff_pbuf[i][n];
    ctx->eobs_pbuf[i][m]    = ctx->eobs_pbuf[i][n];

    ctx->coeff_pbuf[i][n]   = p->coeff;
    ctx->qcoeff_pbuf[i][n]  = p->qcoeff;
    ctx->dqcoeff_pbuf[i][n] = pd->dqcoeff;
    ctx->eobs_pbuf[i][n]    = p->eobs;
  }
}

158
static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
Deb Mukherjee's avatar
Deb Mukherjee committed
159
                            MACROBLOCK *x, MACROBLOCKD *xd,
160
161
                            int *out_rate_sum, int64_t *out_dist_sum,
                            int *skip_txfm_sb, int64_t *skip_sse_sb) {
Deb Mukherjee's avatar
Deb Mukherjee committed
162
163
164
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
165
166
167
  int i;
  int64_t rate_sum = 0;
  int64_t dist_sum = 0;
Scott LaVarnway's avatar
Scott LaVarnway committed
168
  const int ref = xd->mi[0]->ref_frame[0];
169
  unsigned int sse;
170
  unsigned int var = 0;
171
  unsigned int sum_sse = 0;
172
173
  int64_t total_sse = 0;
  int skip_flag = 1;
174
  const int shift = 6;
175
176
  int rate;
  int64_t dist;
177
178
179
180
181
182
  const int dequant_shift =
#if CONFIG_VP9_HIGHBITDEPTH
      (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ?
          xd->bd - 5 :
#endif  // CONFIG_VP9_HIGHBITDEPTH
          3;
183
184

  x->pred_sse[ref] = 0;
Deb Mukherjee's avatar
Deb Mukherjee committed
185
186
187
188

  for (i = 0; i < MAX_MB_PLANE; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &xd->plane[i];
189
    const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
190
191
    const TX_SIZE max_tx_size = max_txsize_lookup[bs];
    const BLOCK_SIZE unit_size = txsize_to_bsize[max_tx_size];
192
193
194
195
    const int64_t dc_thr = p->quant_thred[0] >> shift;
    const int64_t ac_thr = p->quant_thred[1] >> shift;
    // The low thresholds are used to measure if the prediction errors are
    // low enough so that we can skip the mode search.
196
197
    const int64_t low_dc_thr = VPXMIN(50, dc_thr >> 2);
    const int64_t low_ac_thr = VPXMIN(80, ac_thr >> 2);
198
199
200
201
202
203
    int bw = 1 << (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
    int bh = 1 << (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
    int idx, idy;
    int lw = b_width_log2_lookup[unit_size] + 2;
    int lh = b_height_log2_lookup[unit_size] + 2;

204
    sum_sse = 0;
205
206
207
208
209

    for (idy = 0; idy < bh; ++idy) {
      for (idx = 0; idx < bw; ++idx) {
        uint8_t *src = p->src.buf + (idy * p->src.stride << lh) + (idx << lw);
        uint8_t *dst = pd->dst.buf + (idy * pd->dst.stride << lh) + (idx << lh);
210
        int block_idx = (idy << 1) + idx;
211
        int low_err_skip = 0;
212
213
214
215
216
217

        var = cpi->fn_ptr[unit_size].vf(src, p->src.stride,
                                        dst, pd->dst.stride, &sse);
        x->bsse[(i << 2) + block_idx] = sse;
        sum_sse += sse;

218
        x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_NONE;
219
        if (!x->select_tx_size) {
220
          // Check if all ac coefficients can be quantized to zero.
221
          if (var < ac_thr || var == 0) {
222
            x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_AC_ONLY;
223
224

            // Check if dc coefficient can be quantized to zero.
225
            if (sse - var < dc_thr || sse == var) {
226
              x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_AC_DC;
227
228
229
230

              if (!sse || (var < low_ac_thr && sse - var < low_dc_thr))
                low_err_skip = 1;
            }
231
          }
232
        }
233

234
235
236
        if (skip_flag && !low_err_skip)
          skip_flag = 0;

237
238
239
240
        if (i == 0)
          x->pred_sse[ref] += sse;
      }
    }
241

242
243
    total_sse += sum_sse;

244
    // Fast approximate the modelling function.
245
    if (cpi->sf.simple_model_rd_from_var) {
246
      int64_t rate;
247
      const int64_t square_error = sum_sse;
248
      int quantizer = (pd->dequant[1] >> dequant_shift);
249

250
      if (quantizer < 120)
251
        rate = (square_error * (280 - quantizer)) >> (16 - VP9_PROB_COST_SHIFT);
252
253
254
255
256
      else
        rate = 0;
      dist = (square_error * quantizer) >> 8;
      rate_sum += rate;
      dist_sum += dist;
257
    } else {
258
      vp9_model_rd_from_var_lapndz(sum_sse, num_pels_log2_lookup[bs],
259
260
                                   pd->dequant[1] >> dequant_shift,
                                   &rate, &dist);
261
      rate_sum += rate;
262
      dist_sum += dist;
263
    }
Deb Mukherjee's avatar
Deb Mukherjee committed
264
265
  }

266
267
  *skip_txfm_sb = skip_flag;
  *skip_sse_sb = total_sse << 4;
268
269
  *out_rate_sum = (int)rate_sum;
  *out_dist_sum = dist_sum << 4;
Deb Mukherjee's avatar
Deb Mukherjee committed
270
271
}

272
273
274
275
276
#if CONFIG_VP9_HIGHBITDEPTH
int64_t vp9_highbd_block_error_c(const tran_low_t *coeff,
                                 const tran_low_t *dqcoeff,
                                 intptr_t block_size,
                                 int64_t *ssz, int bd) {
Ronald S. Bultje's avatar
Ronald S. Bultje committed
277
  int i;
278
  int64_t error = 0, sqcoeff = 0;
279
280
  int shift = 2 * (bd - 8);
  int rounding = shift > 0 ? 1 << (shift - 1) : 0;
John Koleszar's avatar
John Koleszar committed
281

282
  for (i = 0; i < block_size; i++) {
283
    const int64_t diff = coeff[i] - dqcoeff[i];
284
    error +=  diff * diff;
285
    sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
John Koleszar's avatar
John Koleszar committed
286
  }
287
288
289
  assert(error >= 0 && sqcoeff >= 0);
  error = (error + rounding) >> shift;
  sqcoeff = (sqcoeff + rounding) >> shift;
John Koleszar's avatar
John Koleszar committed
290

291
  *ssz = sqcoeff;
John Koleszar's avatar
John Koleszar committed
292
  return error;
John Koleszar's avatar
John Koleszar committed
293
294
}

295
296
297
298
int64_t vp9_highbd_block_error_8bit_c(const tran_low_t *coeff,
                                      const tran_low_t *dqcoeff,
                                      intptr_t block_size,
                                      int64_t *ssz) {
299
300
301
302
303
  // Note that the C versions of these 2 functions (vp9_block_error and
  // vp9_highbd_block_error_8bit are the same, but the optimized assembly
  // routines are not compatible in the non high bitdepth configuration, so
  // they still cannot share the same name.
  return vp9_block_error_c(coeff, dqcoeff, block_size, ssz);
304
}
305

306
307
308
309
310
311
312
313
314
315
316
317
318
319
static int64_t vp9_highbd_block_error_dispatch(const tran_low_t *coeff,
                                               const tran_low_t *dqcoeff,
                                               intptr_t block_size,
                                               int64_t *ssz, int bd) {
  if (bd == 8) {
    return vp9_highbd_block_error_8bit(coeff, dqcoeff, block_size, ssz);
  } else {
    return vp9_highbd_block_error(coeff, dqcoeff, block_size, ssz, bd);
  }
}
#endif  // CONFIG_VP9_HIGHBITDEPTH

int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
                          intptr_t block_size, int64_t *ssz) {
320
321
322
323
  int i;
  int64_t error = 0, sqcoeff = 0;

  for (i = 0; i < block_size; i++) {
324
    const int diff = coeff[i] - dqcoeff[i];
325
    error +=  diff * diff;
326
    sqcoeff += coeff[i] * coeff[i];
327
328
329
330
331
  }

  *ssz = sqcoeff;
  return error;
}
332
333
334
335
336
337
338
339
340
341
342
343
344

int64_t vp9_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff,
                             int block_size) {
  int i;
  int64_t error = 0;

  for (i = 0; i < block_size; i++) {
    const int diff = coeff[i] - dqcoeff[i];
    error +=  diff * diff;
  }

  return error;
}
345

346
347
348
349
350
/* The trailing '0' is a terminator which is used inside cost_coeffs() to
 * decide whether to include cost of a trailing EOB node or not (i.e. we
 * can skip this if the last coefficient in this transform block, e.g. the
 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
 * were non-zero). */
351
static const int16_t band_counts[TX_SIZES][8] = {
352
353
354
355
  { 1, 2, 3, 4,  3,   16 - 13, 0 },
  { 1, 2, 3, 4, 11,   64 - 21, 0 },
  { 1, 2, 3, 4, 11,  256 - 21, 0 },
  { 1, 2, 3, 4, 11, 1024 - 21, 0 },
356
};
Alex Converse's avatar
Alex Converse committed
357
358
359
360
361
362
static int cost_coeffs(MACROBLOCK *x,
                       int plane, int block,
                       ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
                       TX_SIZE tx_size,
                       const int16_t *scan, const int16_t *nb,
                       int use_fast_coef_costing) {
363
  MACROBLOCKD *const xd = &x->e_mbd;
Scott LaVarnway's avatar
Scott LaVarnway committed
364
  MODE_INFO *mi = xd->mi[0];
365
  const struct macroblock_plane *p = &x->plane[plane];
366
  const PLANE_TYPE type = get_plane_type(plane);
367
  const int16_t *band_count = &band_counts[tx_size][1];
368
  const int eob = p->eobs[block];
369
  const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
370
  unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
Scott LaVarnway's avatar
Scott LaVarnway committed
371
                   x->token_costs[tx_size][type][is_inter_block(mi)];
372
  uint8_t token_cache[32 * 32];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
373
  int pt = combine_entropy_contexts(*A, *L);
374
  int c, cost;
375
#if CONFIG_VP9_HIGHBITDEPTH
376
  const int *cat6_high_cost = vp9_get_high_cost_table(xd->bd);
377
#else
378
  const int *cat6_high_cost = vp9_get_high_cost_table(8);
379
380
#endif

381
  // Check for consistency of tx_size with mode info
Scott LaVarnway's avatar
Scott LaVarnway committed
382
383
  assert(type == PLANE_TYPE_Y ? mi->tx_size == tx_size :
         get_uv_tx_size(mi, &xd->plane[plane]) == tx_size);
384

385
386
  if (eob == 0) {
    // single eob token
387
    cost = token_costs[0][0][pt][EOB_TOKEN];
388
    c = 0;
389
  } else {
390
    int band_left = *band_count++;
391
392

    // dc token
Dmitry Kovalev's avatar
Dmitry Kovalev committed
393
    int v = qcoeff[0];
394
395
396
397
398
399
    int16_t prev_t;
    EXTRABIT e;
    vp9_get_token_extra(v, &prev_t, &e);
    cost = (*token_costs)[0][pt][prev_t] +
        vp9_get_cost(prev_t, e, cat6_high_cost);

400
    token_cache[0] = vp9_pt_energy_class[prev_t];
401
    ++token_costs;
402
403
404
405

    // ac tokens
    for (c = 1; c < eob; c++) {
      const int rc = scan[c];
406
      int16_t t;
407

Dmitry Kovalev's avatar
Dmitry Kovalev committed
408
      v = qcoeff[rc];
409
      vp9_get_token_extra(v, &t, &e);
410
      if (use_fast_coef_costing) {
411
412
        cost += (*token_costs)[!prev_t][!prev_t][t] +
            vp9_get_cost(t, e, cat6_high_cost);
413
      } else {
414
        pt = get_coef_context(nb, token_cache, c);
415
416
        cost += (*token_costs)[!prev_t][pt][t] +
            vp9_get_cost(t, e, cat6_high_cost);
417
        token_cache[rc] = vp9_pt_energy_class[t];
418
      }
419
      prev_t = t;
420
      if (!--band_left) {
421
422
        band_left = *band_count++;
        ++token_costs;
423
      }
424
    }
425
426

    // eob token
427
    if (band_left) {
428
429
430
      if (use_fast_coef_costing) {
        cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
      } else {
431
        pt = get_coef_context(nb, token_cache, c);
432
433
        cost += (*token_costs)[0][pt][EOB_TOKEN];
      }
434
    }
435
436
  }

437
  // is eob first coefficient;
438
  *A = *L = (c > 0);
439

440
441
  return cost;
}
442

Alex Converse's avatar
Alex Converse committed
443
444
static void dist_block(MACROBLOCK *x, int plane, int block, TX_SIZE tx_size,
                       int64_t *out_dist, int64_t *out_sse) {
445
  const int ss_txfrm_size = tx_size << 1;
Deb Mukherjee's avatar
Deb Mukherjee committed
446
  MACROBLOCKD* const xd = &x->e_mbd;
447
448
  const struct macroblock_plane *const p = &x->plane[plane];
  const struct macroblockd_plane *const pd = &xd->plane[plane];
Deb Mukherjee's avatar
Deb Mukherjee committed
449
  int64_t this_sse;
Alex Converse's avatar
Alex Converse committed
450
  int shift = tx_size == TX_32X32 ? 0 : 2;
451
452
  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
453
#if CONFIG_VP9_HIGHBITDEPTH
Alex Converse's avatar
Alex Converse committed
454
  const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8;
455
456
457
  *out_dist = vp9_highbd_block_error_dispatch(coeff, dqcoeff,
                                              16 << ss_txfrm_size,
                                              &this_sse, bd) >> shift;
458
#else
Alex Converse's avatar
Alex Converse committed
459
460
  *out_dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
                              &this_sse) >> shift;
461
#endif  // CONFIG_VP9_HIGHBITDEPTH
Alex Converse's avatar
Alex Converse committed
462
  *out_sse = this_sse >> shift;
463

Scott LaVarnway's avatar
Scott LaVarnway committed
464
  if (x->skip_encode && !is_inter_block(xd->mi[0])) {
465
466
    // TODO(jingning): tune the model to better capture the distortion.
    int64_t p = (pd->dequant[1] * pd->dequant[1] *
Alex Converse's avatar
Alex Converse committed
467
                    (1 << ss_txfrm_size)) >>
468
#if CONFIG_VP9_HIGHBITDEPTH
Alex Converse's avatar
Alex Converse committed
469
470
471
                        (shift + 2 + (bd - 8) * 2);
#else
                        (shift + 2);
472
#endif  // CONFIG_VP9_HIGHBITDEPTH
Alex Converse's avatar
Alex Converse committed
473
474
    *out_dist += (p >> 4);
    *out_sse  += p;
475
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
476
477
}

Alex Converse's avatar
Alex Converse committed
478
479
static int rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
                      TX_SIZE tx_size, struct rdcost_block_args* args) {
Deb Mukherjee's avatar
Deb Mukherjee committed
480
  int x_idx, y_idx;
Alex Converse's avatar
Alex Converse committed
481
  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x_idx, &y_idx);
Deb Mukherjee's avatar
Deb Mukherjee committed
482

Alex Converse's avatar
Alex Converse committed
483
484
485
486
  return cost_coeffs(args->x, plane, block, args->t_above + x_idx,
                     args->t_left + y_idx, tx_size,
                     args->so->scan, args->so->neighbors,
                     args->use_fast_coef_costing);
Deb Mukherjee's avatar
Deb Mukherjee committed
487
488
}

489
490
static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
                          TX_SIZE tx_size, void *arg) {
Deb Mukherjee's avatar
Deb Mukherjee committed
491
492
493
  struct rdcost_block_args *args = arg;
  MACROBLOCK *const x = args->x;
  MACROBLOCKD *const xd = &x->e_mbd;
Scott LaVarnway's avatar
Scott LaVarnway committed
494
  MODE_INFO *const mi = xd->mi[0];
495
  int64_t rd1, rd2, rd;
Alex Converse's avatar
Alex Converse committed
496
497
498
  int rate;
  int64_t dist;
  int64_t sse;
Deb Mukherjee's avatar
Deb Mukherjee committed
499

Alex Converse's avatar
Alex Converse committed
500
  if (args->exit_early)
501
502
    return;

Scott LaVarnway's avatar
Scott LaVarnway committed
503
504
  if (!is_inter_block(mi)) {
    struct encode_b_args arg = {x, NULL, &mi->skip};
505
    vp9_encode_block_intra(plane, block, plane_bsize, tx_size, &arg);
Alex Converse's avatar
Alex Converse committed
506
    dist_block(x, plane, block, tx_size, &dist, &sse);
507
  } else if (max_txsize_lookup[plane_bsize] == tx_size) {
508
509
    if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] ==
        SKIP_TXFM_NONE) {
510
511
      // full forward transform and quantization
      vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
Alex Converse's avatar
Alex Converse committed
512
      dist_block(x, plane, block, tx_size, &dist, &sse);
513
514
    } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] ==
               SKIP_TXFM_AC_ONLY) {
515
      // compute DC coefficient
516
517
      tran_low_t *const coeff   = BLOCK_OFFSET(x->plane[plane].coeff, block);
      tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
518
      vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size);
Alex Converse's avatar
Alex Converse committed
519
520
      sse  = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
      dist = sse;
521
      if (x->plane[plane].eobs[block]) {
Jingning Han's avatar
Jingning Han committed
522
523
524
        const int64_t orig_sse = (int64_t)coeff[0] * coeff[0];
        const int64_t resd_sse = coeff[0] - dqcoeff[0];
        int64_t dc_correct = orig_sse - resd_sse * resd_sse;
525
526
527
#if CONFIG_VP9_HIGHBITDEPTH
        dc_correct >>= ((xd->bd - 8) * 2);
#endif
528
529
530
        if (tx_size != TX_32X32)
          dc_correct >>= 2;

531
        dist = VPXMAX(0, sse - dc_correct);
532
      }
533
    } else {
534
      // SKIP_TXFM_AC_DC
535
536
      // skip forward transform
      x->plane[plane].eobs[block] = 0;
Alex Converse's avatar
Alex Converse committed
537
538
      sse  = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
      dist = sse;
539
    }
540
541
542
  } else {
    // full forward transform and quantization
    vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
Alex Converse's avatar
Alex Converse committed
543
    dist_block(x, plane, block, tx_size, &dist, &sse);
544
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
545

546
547
548
549
550
551
  rd = RDCOST(x->rdmult, x->rddiv, 0, dist);
  if (args->this_rd + rd > args->best_rd) {
    args->exit_early = 1;
    return;
  }

Alex Converse's avatar
Alex Converse committed
552
553
554
  rate = rate_block(plane, block, plane_bsize, tx_size, args);
  rd1 = RDCOST(x->rdmult, x->rddiv, rate, dist);
  rd2 = RDCOST(x->rdmult, x->rddiv, 0, sse);
555
556

  // TODO(jingning): temporarily enabled only for luma component
557
  rd = VPXMIN(rd1, rd2);
Yaowu Xu's avatar
Yaowu Xu committed
558
  if (plane == 0)
559
    x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] ||
Yaowu Xu's avatar
Yaowu Xu committed
560
                                    (rd1 > rd2 && !xd->lossless);
561

Alex Converse's avatar
Alex Converse committed
562
563
564
  args->this_rate += rate;
  args->this_dist += dist;
  args->this_sse += sse;
565
566
567
  args->this_rd += rd;

  if (args->this_rd > args->best_rd) {
Alex Converse's avatar
Alex Converse committed
568
    args->exit_early = 1;
569
570
    return;
  }
571
572

  args->skippable &= !x->plane[plane].eobs[block];
Deb Mukherjee's avatar
Deb Mukherjee committed
573
574
}

575
static void txfm_rd_in_plane(MACROBLOCK *x,
576
577
578
                             int *rate, int64_t *distortion,
                             int *skippable, int64_t *sse,
                             int64_t ref_best_rd, int plane,
579
580
                             BLOCK_SIZE bsize, TX_SIZE tx_size,
                             int use_fast_coef_casting) {
Deb Mukherjee's avatar
Deb Mukherjee committed
581
  MACROBLOCKD *const xd = &x->e_mbd;
582
  const struct macroblockd_plane *const pd = &xd->plane[plane];
583
584
  struct rdcost_block_args args;
  vp9_zero(args);
585
586
  args.x = x;
  args.best_rd = ref_best_rd;
587
  args.use_fast_coef_costing = use_fast_coef_casting;
588
  args.skippable = 1;
589

590
  if (plane == 0)
Scott LaVarnway's avatar
Scott LaVarnway committed
591
    xd->mi[0]->tx_size = tx_size;
592

593
  vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
594

595
  args.so = get_scan(xd, tx_size, get_plane_type(plane), 0);
Deb Mukherjee's avatar
Deb Mukherjee committed
596

597
  vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
598
                                         block_rd_txfm, &args);
Alex Converse's avatar
Alex Converse committed
599
  if (args.exit_early) {
600
601
602
603
604
    *rate       = INT_MAX;
    *distortion = INT64_MAX;
    *sse        = INT64_MAX;
    *skippable  = 0;
  } else {
605
606
607
    *distortion = args.this_dist;
    *rate       = args.this_rate;
    *sse        = args.this_sse;
608
    *skippable  = args.skippable;
609
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
610
611
}

612
613
614
615
616
static void choose_largest_tx_size(VP9_COMP *cpi, MACROBLOCK *x,
                                   int *rate, int64_t *distortion,
                                   int *skip, int64_t *sse,
                                   int64_t ref_best_rd,
                                   BLOCK_SIZE bs) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
617
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
Deb Mukherjee's avatar
Deb Mukherjee committed
618
  VP9_COMMON *const cm = &cpi->common;
Yaowu Xu's avatar
Yaowu Xu committed
619
  const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
Deb Mukherjee's avatar
Deb Mukherjee committed
620
  MACROBLOCKD *const xd = &x->e_mbd;
Scott LaVarnway's avatar
Scott LaVarnway committed
621
  MODE_INFO *const mi = xd->mi[0];
Yaowu Xu's avatar
Yaowu Xu committed
622

Scott LaVarnway's avatar
Scott LaVarnway committed
623
  mi->tx_size = VPXMIN(max_tx_size, largest_tx_size);
Yaowu Xu's avatar
Yaowu Xu committed
624

Alex Converse's avatar
Alex Converse committed
625
  txfm_rd_in_plane(x, rate, distortion, skip,
626
                   sse, ref_best_rd, 0, bs,
Scott LaVarnway's avatar
Scott LaVarnway committed
627
                   mi->tx_size, cpi->sf.use_fast_coef_costing);
Deb Mukherjee's avatar
Deb Mukherjee committed
628
629
}

630
static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
631
632
633
634
635
                                   int *rate,
                                   int64_t *distortion,
                                   int *skip,
                                   int64_t *psse,
                                   int64_t ref_best_rd,
636
                                   BLOCK_SIZE bs) {
637
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
638
639
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
Scott LaVarnway's avatar
Scott LaVarnway committed
640
  MODE_INFO *const mi = xd->mi[0];
Yaowu Xu's avatar
Yaowu Xu committed
641
  vpx_prob skip_prob = vp9_get_skip_prob(cm, xd);
642
643
  int r[TX_SIZES][2], s[TX_SIZES];
  int64_t d[TX_SIZES], sse[TX_SIZES];
644
645
646
647
  int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
                             {INT64_MAX, INT64_MAX},
                             {INT64_MAX, INT64_MAX},
                             {INT64_MAX, INT64_MAX}};
648
  int n, m;
649
  int s0, s1;
650
  int64_t best_rd = INT64_MAX;
651
  TX_SIZE best_tx = max_tx_size;
652
  int start_tx, end_tx;
653

Yaowu Xu's avatar
Yaowu Xu committed
654
  const vpx_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs);
655
656
657
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
658

659
660
661
662
  if (cm->tx_mode == TX_MODE_SELECT) {
    start_tx = max_tx_size;
    end_tx = 0;
  } else {
663
664
    TX_SIZE chosen_tx_size = VPXMIN(max_tx_size,
                                    tx_mode_to_biggest_tx_size[cm->tx_mode]);
665
666
667
668
669
    start_tx = chosen_tx_size;
    end_tx = chosen_tx_size;
  }

  for (n = start_tx; n >= end_tx; n--) {
670
671
672
673
674
675
676
    int r_tx_size = 0;
    for (m = 0; m <= n - (n == (int) max_tx_size); m++) {
      if (m == n)
        r_tx_size += vp9_cost_zero(tx_probs[m]);
      else
        r_tx_size += vp9_cost_one(tx_probs[m]);
    }
677
678
679
    txfm_rd_in_plane(x, &r[n][0], &d[n], &s[n],
                     &sse[n], ref_best_rd, 0, bs, n,
                     cpi->sf.use_fast_coef_costing);
680
681
    r[n][1] = r[n][0];
    if (r[n][0] < INT_MAX) {
682
      r[n][1] += r_tx_size;
683
    }
684
    if (d[n] == INT64_MAX || r[n][0] == INT_MAX) {
685
      rd[n][0] = rd[n][1] = INT64_MAX;
686
    } else if (s[n]) {
Scott LaVarnway's avatar
Scott LaVarnway committed
687
      if (is_inter_block(mi)) {
688
        rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, sse[n]);
689
        r[n][1] -= r_tx_size;
690
691
692
693
      } else {
        rd[n][0] = RDCOST(x->rdmult, x->rddiv, s1, sse[n]);
        rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1 + r_tx_size, sse[n]);
      }
694
695
696
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
697
    }
698

Scott LaVarnway's avatar
Scott LaVarnway committed
699
    if (is_inter_block(mi) && !xd->lossless && !s[n] && sse[n] != INT64_MAX) {
700
701
702
703
      rd[n][0] = VPXMIN(rd[n][0], RDCOST(x->rdmult, x->rddiv, s1, sse[n]));
      rd[n][1] = VPXMIN(rd[n][1], RDCOST(x->rdmult, x->rddiv, s1, sse[n]));
    }

704
705
706
    // Early termination in transform size search.
    if (cpi->sf.tx_size_search_breakout &&
        (rd[n][1] == INT64_MAX ||
707
        (n < (int) max_tx_size && rd[n][1] > rd[n + 1][1]) ||
708
709
710
        s[n] == 1))
      break;

711
712
713
714
    if (rd[n][1] < best_rd) {
      best_tx = n;
      best_rd = rd[n][1];
    }
715
  }
Scott LaVarnway's avatar
Scott LaVarnway committed
716
  mi->tx_size = best_tx;
717

Scott LaVarnway's avatar
Scott LaVarnway committed
718
719
720
721
  *distortion = d[mi->tx_size];
  *rate       = r[mi->tx_size][cm->tx_mode == TX_MODE_SELECT];
  *skip       = s[mi->tx_size];
  *psse       = sse[mi->tx_size];
Deb Mukherjee's avatar
Deb Mukherjee committed
722
}
723

724
725
726
727
static void super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
                            int64_t *distortion, int *skip,
                            int64_t *psse, BLOCK_SIZE bs,
                            int64_t ref_best_rd) {
Jim Bankoski's avatar
Jim Bankoski committed
728
  MACROBLOCKD *xd = &x->e_mbd;
729
730
  int64_t sse;
  int64_t *ret_sse = psse ? psse : &sse;
Yaowu Xu's avatar
Yaowu Xu committed
731

Scott LaVarnway's avatar
Scott LaVarnway committed
732
  assert(bs == xd->mi[0]->sb_type);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
733

734
  if (cpi->sf.tx_size_search_method == USE_LARGESTALL || xd->lossless) {
735
    choose_largest_tx_size(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd,
736
                           bs);
737
  } else {
738
    choose_tx_size_from_rd(cpi, x, rate, distortion, skip, ret_sse,
739
                           ref_best_rd, bs);
740
741
742
  }
}

743
744
static int conditional_skipintra(PREDICTION_MODE mode,
                                 PREDICTION_MODE best_intra_mode) {
745
746
747
748
749
750
751
752
  if (mode == D117_PRED &&
      best_intra_mode != V_PRED &&
      best_intra_mode != D135_PRED)
    return 1;
  if (mode == D63_PRED &&
      best_intra_mode != V_PRED &&
      best_intra_mode != D45_PRED)
    return 1;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
753
  if (mode == D207_PRED &&
754
755
756
757
758
759
760
761
762
763
      best_intra_mode != H_PRED &&
      best_intra_mode != D45_PRED)
    return 1;
  if (mode == D153_PRED &&
      best_intra_mode != H_PRED &&
      best_intra_mode != D135_PRED)
    return 1;
  return 0;
}

764
765
static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x,
                                     int row, int col,
766
                                     PREDICTION_MODE *best_mode,
767
                                     const int *bmode_costs,
768
769
                                     ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                                     int *bestrate, int *bestratey,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
770
                                     int64_t *bestdistortion,
771
                                     BLOCK_SIZE bsize, int64_t rd_thresh) {
772
  PREDICTION_MODE mode;
773
  MACROBLOCKD *const xd = &x->e_mbd;
774
  int64_t best_rd = rd_thresh;
775
776
777
  struct macroblock_plane *p = &x->plane[0];
  struct macroblockd_plane *pd = &xd->plane[0];
  const int src_stride = p->src.stride;
778
  const int dst_stride = pd->dst.stride;
779
780
  const uint8_t *src_init = &p->src.buf[row * 4 * src_stride + col * 4];
  uint8_t *dst_init = &pd->dst.buf[row * 4 * src_stride + col * 4];
781
782
  ENTROPY_CONTEXT ta[2], tempa[2];
  ENTROPY_CONTEXT tl[2], templ[2];
Jim Bankoski's avatar
Jim Bankoski committed
783
784
  const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
  const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
785
  int idx, idy;
786
  uint8_t best_dst[8 * 8];
787
788
789
#if CONFIG_VP9_HIGHBITDEPTH
  uint16_t best_dst16[8 * 8];
#endif
John Koleszar's avatar
John Koleszar committed
790

James Zern's avatar
James Zern committed
791
792
  memcpy(ta, a, sizeof(ta));
  memcpy(tl, l, sizeof(tl));
Scott LaVarnway's avatar
Scott LaVarnway committed
793
  xd->mi[0]->tx_size = TX_4X4;
794

795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
#if CONFIG_VP9_HIGHBITDEPTH
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
      int64_t this_rd;
      int ratey = 0;
      int64_t distortion = 0;
      int rate = bmode_costs[mode];

      if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
        continue;

      // Only do the oblique modes if the best so far is
      // one of the neighboring directional modes
      if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
        if (conditional_skipintra(mode, *best_mode))
            continue;
      }

James Zern's avatar
James Zern committed
813
814
      memcpy(tempa, ta, sizeof(ta));
      memcpy(templ, tl, sizeof(tl));
815
816
817

      for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
        for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
818
          const int block = (row + idy) * 2 + (col + idx);
819
820
          const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
          uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
821
822
823
          int16_t *const src_diff = vp9_raster_block_offset_int16(BLOCK_8X8,
                                                                  block,
                                                                  p->src_diff);
824
          tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
825
          xd->mi[0]->bmi[block].as_mode = mode;
826
          vp9_predict_intra_block(xd, 1, TX_4X4, mode,
827
828
                                  x->skip_encode ? src : dst,
                                  x->skip_encode ? src_stride : dst_stride,
829
830
                                  dst, dst_stride,
                                  col + idx, row + idy, 0);
831
          vpx_highbd_subtract_block(4, 4, src_diff, 8, src, src_stride,
832
                                    dst, dst_stride, xd->bd);
833
834
          if (xd->lossless) {
            const scan_order *so = &vp9_default_scan_orders[TX_4X4];
835
            vp9_highbd_fwht4x4(src_diff, coeff, 8);
836
837
838
839
840
841
            vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
            ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
                                 so->scan, so->neighbors,
                                 cpi->sf.use_fast_coef_costing);
            if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
              goto next_highbd;
842
843
844
            vp9_highbd_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block),
                                   dst, dst_stride,
                                   p->eobs[block], xd->bd);
845
846
847
848
          } else {
            int64_t unused;
            const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block);
            const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type];
849
            if (tx_type == DCT_DCT)
850
              vpx_highbd_fdct4x4(src_diff, coeff, 8);
851
852
            else
              vp9_highbd_fht4x4(src_diff, coeff, 8, tx_type);
853
854
855
856
            vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
            ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
                                 so->scan, so->neighbors,
                                 cpi->sf.use_fast_coef_costing);
857
            distortion += vp9_highbd_block_error_dispatch(