vp9_rdopt.c 153 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
11
 */

#include <assert.h>
12
#include <math.h>
13

14
15
16
17
18
19
#include "./vp9_rtcd.h"

#include "vpx_mem/vpx_mem.h"

#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_entropy.h"
20
#include "vp9/common/vp9_entropymode.h"
21
22
23
24
#include "vp9/common/vp9_idct.h"
#include "vp9/common/vp9_mvref_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_quant_common.h"
25
26
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
27
28
29
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_systemdependent.h"

Dmitry Kovalev's avatar
Dmitry Kovalev committed
30
#include "vp9/encoder/vp9_cost.h"
31
#include "vp9/encoder/vp9_encodemb.h"
32
#include "vp9/encoder/vp9_encodemv.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
33
#include "vp9/encoder/vp9_encoder.h"
34
#include "vp9/encoder/vp9_mcomp.h"
35
#include "vp9/encoder/vp9_quantize.h"
36
#include "vp9/encoder/vp9_ratectrl.h"
37
#include "vp9/encoder/vp9_rd.h"
38
39
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_variance.h"
Paul Wilkins's avatar
Paul Wilkins committed
40

41
42
#define RD_THRESH_MAX_FACT 64
#define RD_THRESH_INC      1
43

44
45
46
47
48
49
50
51
#define LAST_FRAME_MODE_MASK    ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | \
                                 (1 << INTRA_FRAME))
#define GOLDEN_FRAME_MODE_MASK  ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | \
                                 (1 << INTRA_FRAME))
#define ALT_REF_MODE_MASK       ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | \
                                 (1 << INTRA_FRAME))

#define SECOND_REF_FRAME_MASK   ((1 << ALTREF_FRAME) | 0x01)
52

Paul Wilkins's avatar
Paul Wilkins committed
53
54
#define MIN_EARLY_TERM_INDEX    3

55
typedef struct {
56
  PREDICTION_MODE mode;
57
58
59
60
61
62
63
  MV_REFERENCE_FRAME ref_frame[2];
} MODE_DEFINITION;

typedef struct {
  MV_REFERENCE_FRAME ref_frame[2];
} REF_DEFINITION;

Alex Converse's avatar
Alex Converse committed
64
65
66
67
68
69
70
71
72
73
74
75
76
struct rdcost_block_args {
  MACROBLOCK *x;
  ENTROPY_CONTEXT t_above[16];
  ENTROPY_CONTEXT t_left[16];
  int rate;
  int64_t dist;
  int64_t sse;
  int this_rate;
  int64_t this_dist;
  int64_t this_sse;
  int64_t this_rd;
  int64_t best_rd;
  int skip;
77
  int use_fast_coef_costing;
78
  const scan_order *so;
Alex Converse's avatar
Alex Converse committed
79
80
};

81
static const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
82
83
84
85
86
87
88
89
90
91
92
93
  {NEARESTMV, {LAST_FRAME,   NONE}},
  {NEARESTMV, {ALTREF_FRAME, NONE}},
  {NEARESTMV, {GOLDEN_FRAME, NONE}},

  {DC_PRED,   {INTRA_FRAME,  NONE}},

  {NEWMV,     {LAST_FRAME,   NONE}},
  {NEWMV,     {ALTREF_FRAME, NONE}},
  {NEWMV,     {GOLDEN_FRAME, NONE}},

  {NEARMV,    {LAST_FRAME,   NONE}},
  {NEARMV,    {ALTREF_FRAME, NONE}},
Jingning Han's avatar
Jingning Han committed
94
95
96
97
98
99
  {NEARMV,    {GOLDEN_FRAME, NONE}},

  {ZEROMV,    {LAST_FRAME,   NONE}},
  {ZEROMV,    {GOLDEN_FRAME, NONE}},
  {ZEROMV,    {ALTREF_FRAME, NONE}},

100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
  {NEARESTMV, {LAST_FRAME,   ALTREF_FRAME}},
  {NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}},

  {TM_PRED,   {INTRA_FRAME,  NONE}},

  {NEARMV,    {LAST_FRAME,   ALTREF_FRAME}},
  {NEWMV,     {LAST_FRAME,   ALTREF_FRAME}},
  {NEARMV,    {GOLDEN_FRAME, ALTREF_FRAME}},
  {NEWMV,     {GOLDEN_FRAME, ALTREF_FRAME}},

  {ZEROMV,    {LAST_FRAME,   ALTREF_FRAME}},
  {ZEROMV,    {GOLDEN_FRAME, ALTREF_FRAME}},

  {H_PRED,    {INTRA_FRAME,  NONE}},
  {V_PRED,    {INTRA_FRAME,  NONE}},
  {D135_PRED, {INTRA_FRAME,  NONE}},
  {D207_PRED, {INTRA_FRAME,  NONE}},
  {D153_PRED, {INTRA_FRAME,  NONE}},
  {D63_PRED,  {INTRA_FRAME,  NONE}},
  {D117_PRED, {INTRA_FRAME,  NONE}},
  {D45_PRED,  {INTRA_FRAME,  NONE}},
121
122
};

123
static const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
124
125
126
127
128
129
  {{LAST_FRAME,   NONE}},
  {{GOLDEN_FRAME, NONE}},
  {{ALTREF_FRAME, NONE}},
  {{LAST_FRAME,   ALTREF_FRAME}},
  {{GOLDEN_FRAME, ALTREF_FRAME}},
  {{INTRA_FRAME,  NONE}},
John Koleszar's avatar
John Koleszar committed
130
131
};

132
133
static int raster_block_offset(BLOCK_SIZE plane_bsize,
                               int raster_block, int stride) {
134
  const int bw = b_width_log2_lookup[plane_bsize];
135
136
137
138
139
140
  const int y = 4 * (raster_block >> bw);
  const int x = 4 * (raster_block & ((1 << bw) - 1));
  return y * stride + x;
}
static int16_t* raster_block_offset_int16(BLOCK_SIZE plane_bsize,
                                          int raster_block, int16_t *base) {
141
  const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
142
143
144
  return base + raster_block_offset(plane_bsize, raster_block, stride);
}

145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
                           int m, int n, int min_plane, int max_plane) {
  int i;

  for (i = min_plane; i < max_plane; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &x->e_mbd.plane[i];

    p->coeff    = ctx->coeff_pbuf[i][m];
    p->qcoeff   = ctx->qcoeff_pbuf[i][m];
    pd->dqcoeff = ctx->dqcoeff_pbuf[i][m];
    p->eobs     = ctx->eobs_pbuf[i][m];

    ctx->coeff_pbuf[i][m]   = ctx->coeff_pbuf[i][n];
    ctx->qcoeff_pbuf[i][m]  = ctx->qcoeff_pbuf[i][n];
    ctx->dqcoeff_pbuf[i][m] = ctx->dqcoeff_pbuf[i][n];
    ctx->eobs_pbuf[i][m]    = ctx->eobs_pbuf[i][n];

    ctx->coeff_pbuf[i][n]   = p->coeff;
    ctx->qcoeff_pbuf[i][n]  = p->qcoeff;
    ctx->dqcoeff_pbuf[i][n] = pd->dqcoeff;
    ctx->eobs_pbuf[i][n]    = p->eobs;
  }
}

170
static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
Deb Mukherjee's avatar
Deb Mukherjee committed
171
                            MACROBLOCK *x, MACROBLOCKD *xd,
172
173
                            int *out_rate_sum, int64_t *out_dist_sum,
                            int *skip_txfm_sb, int64_t *skip_sse_sb) {
Deb Mukherjee's avatar
Deb Mukherjee committed
174
175
176
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
177
178
179
  int i;
  int64_t rate_sum = 0;
  int64_t dist_sum = 0;
hkuang's avatar
hkuang committed
180
  const int ref = xd->mi[0].src_mi->mbmi.ref_frame[0];
181
  unsigned int sse;
182
  unsigned int var = 0;
183
  unsigned int sum_sse = 0;
184
185
  int64_t total_sse = 0;
  int skip_flag = 1;
186
  const int shift = 6;
187
188
189
190
  int rate;
  int64_t dist;

  x->pred_sse[ref] = 0;
Deb Mukherjee's avatar
Deb Mukherjee committed
191
192
193
194

  for (i = 0; i < MAX_MB_PLANE; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &xd->plane[i];
195
    const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
196
197
    const TX_SIZE max_tx_size = max_txsize_lookup[bs];
    const BLOCK_SIZE unit_size = txsize_to_bsize[max_tx_size];
198
199
200
201
202
203
    const int64_t dc_thr = p->quant_thred[0] >> shift;
    const int64_t ac_thr = p->quant_thred[1] >> shift;
    // The low thresholds are used to measure if the prediction errors are
    // low enough so that we can skip the mode search.
    const int64_t low_dc_thr = MIN(50, dc_thr >> 2);
    const int64_t low_ac_thr = MIN(80, ac_thr >> 2);
204
205
206
207
208
209
    int bw = 1 << (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
    int bh = 1 << (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
    int idx, idy;
    int lw = b_width_log2_lookup[unit_size] + 2;
    int lh = b_height_log2_lookup[unit_size] + 2;

210
    sum_sse = 0;
211
212
213
214
215

    for (idy = 0; idy < bh; ++idy) {
      for (idx = 0; idx < bw; ++idx) {
        uint8_t *src = p->src.buf + (idy * p->src.stride << lh) + (idx << lw);
        uint8_t *dst = pd->dst.buf + (idy * pd->dst.stride << lh) + (idx << lh);
216
        int block_idx = (idy << 1) + idx;
217
        int low_err_skip = 0;
218
219
220
221
222
223

        var = cpi->fn_ptr[unit_size].vf(src, p->src.stride,
                                        dst, pd->dst.stride, &sse);
        x->bsse[(i << 2) + block_idx] = sse;
        sum_sse += sse;

224
        x->skip_txfm[(i << 2) + block_idx] = 0;
225
        if (!x->select_tx_size) {
226
          // Check if all ac coefficients can be quantized to zero.
227
          if (var < ac_thr || var == 0) {
228
            x->skip_txfm[(i << 2) + block_idx] = 2;
229
230

            // Check if dc coefficient can be quantized to zero.
231
            if (sse - var < dc_thr || sse == var) {
232
              x->skip_txfm[(i << 2) + block_idx] = 1;
233
234
235
236

              if (!sse || (var < low_ac_thr && sse - var < low_dc_thr))
                low_err_skip = 1;
            }
237
          }
238
        }
239

240
241
242
        if (skip_flag && !low_err_skip)
          skip_flag = 0;

243
244
245
246
        if (i == 0)
          x->pred_sse[ref] += sse;
      }
    }
247

248
249
    total_sse += sum_sse;

250
    // Fast approximate the modelling function.
251
    if (cpi->oxcf.speed > 4) {
252
      int64_t rate;
253
      const int64_t square_error = sum_sse;
254
      int quantizer = (pd->dequant[1] >> 3);
255
256
257
258
259
#if CONFIG_VP9_HIGHBITDEPTH
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        quantizer >>= (xd->bd - 8);
      }
#endif  // CONFIG_VP9_HIGHBITDEPTH
260

261
262
      if (quantizer < 120)
        rate = (square_error * (280 - quantizer)) >> 8;
263
264
265
266
267
      else
        rate = 0;
      dist = (square_error * quantizer) >> 8;
      rate_sum += rate;
      dist_sum += dist;
268
    } else {
269
270
#if CONFIG_VP9_HIGHBITDEPTH
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
Deb Mukherjee's avatar
Deb Mukherjee committed
271
        vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],
272
273
274
                                     pd->dequant[1] >> (xd->bd - 5),
                                     &rate, &dist);
      } else {
Deb Mukherjee's avatar
Deb Mukherjee committed
275
        vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],
276
277
278
                                     pd->dequant[1] >> 3, &rate, &dist);
      }
#else
279
      vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],
280
                                   pd->dequant[1] >> 3, &rate, &dist);
281
#endif  // CONFIG_VP9_HIGHBITDEPTH
282
      rate_sum += rate;
283
      dist_sum += dist;
284
    }
Deb Mukherjee's avatar
Deb Mukherjee committed
285
286
  }

287
288
  *skip_txfm_sb = skip_flag;
  *skip_sse_sb = total_sse << 4;
289
290
  *out_rate_sum = (int)rate_sum;
  *out_dist_sum = dist_sum << 4;
Deb Mukherjee's avatar
Deb Mukherjee committed
291
292
}

293
int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
294
                          intptr_t block_size, int64_t *ssz) {
Ronald S. Bultje's avatar
Ronald S. Bultje committed
295
  int i;
296
  int64_t error = 0, sqcoeff = 0;
John Koleszar's avatar
John Koleszar committed
297

298
  for (i = 0; i < block_size; i++) {
299
300
301
    const int diff = coeff[i] - dqcoeff[i];
    error +=  diff * diff;
    sqcoeff += coeff[i] * coeff[i];
John Koleszar's avatar
John Koleszar committed
302
  }
John Koleszar's avatar
John Koleszar committed
303

304
  *ssz = sqcoeff;
John Koleszar's avatar
John Koleszar committed
305
  return error;
John Koleszar's avatar
John Koleszar committed
306
307
}

308
309

#if CONFIG_VP9_HIGHBITDEPTH
310
311
312
313
int64_t vp9_highbd_block_error_c(const tran_low_t *coeff,
                                 const tran_low_t *dqcoeff,
                                 intptr_t block_size,
                                 int64_t *ssz, int bd) {
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
  int i;
  int64_t error = 0, sqcoeff = 0;
  int shift = 2 * (bd - 8);
  int rounding = shift > 0 ? 1 << (shift - 1) : 0;

  for (i = 0; i < block_size; i++) {
    const int64_t diff = coeff[i] - dqcoeff[i];
    error +=  diff * diff;
    sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
  }
  assert(error >= 0 && sqcoeff >= 0);
  error = (error + rounding) >> shift;
  sqcoeff = (sqcoeff + rounding) >> shift;

  *ssz = sqcoeff;
  return error;
}
#endif  // CONFIG_VP9_HIGHBITDEPTH

333
334
335
336
337
/* The trailing '0' is a terminator which is used inside cost_coeffs() to
 * decide whether to include cost of a trailing EOB node or not (i.e. we
 * can skip this if the last coefficient in this transform block, e.g. the
 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
 * were non-zero). */
338
static const int16_t band_counts[TX_SIZES][8] = {
339
340
341
342
  { 1, 2, 3, 4,  3,   16 - 13, 0 },
  { 1, 2, 3, 4, 11,   64 - 21, 0 },
  { 1, 2, 3, 4, 11,  256 - 21, 0 },
  { 1, 2, 3, 4, 11, 1024 - 21, 0 },
343
};
344
static INLINE int cost_coeffs(MACROBLOCK *x,
345
                              int plane, int block,
346
                              ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
John Koleszar's avatar
John Koleszar committed
347
                              TX_SIZE tx_size,
348
349
                              const int16_t *scan, const int16_t *nb,
                              int use_fast_coef_costing) {
350
  MACROBLOCKD *const xd = &x->e_mbd;
hkuang's avatar
hkuang committed
351
  MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
352
353
  const struct macroblock_plane *p = &x->plane[plane];
  const struct macroblockd_plane *pd = &xd->plane[plane];
354
  const PLANE_TYPE type = pd->plane_type;
355
  const int16_t *band_count = &band_counts[tx_size][1];
356
  const int eob = p->eobs[block];
357
  const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
358
  unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
Dmitry Kovalev's avatar
Dmitry Kovalev committed
359
                   x->token_costs[tx_size][type][is_inter_block(mbmi)];
360
  uint8_t token_cache[32 * 32];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
361
  int pt = combine_entropy_contexts(*A, *L);
362
  int c, cost;
363
  // Check for consistency of tx_size with mode info
364
  assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
365
                              : get_uv_tx_size(mbmi, pd) == tx_size);
366

367
368
  if (eob == 0) {
    // single eob token
369
    cost = token_costs[0][0][pt][EOB_TOKEN];
370
    c = 0;
371
  } else {
372
    int band_left = *band_count++;
373
374

    // dc token
Dmitry Kovalev's avatar
Dmitry Kovalev committed
375
    int v = qcoeff[0];
376
    int prev_t = vp9_dct_value_tokens_ptr[v].token;
377
    cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
378
    token_cache[0] = vp9_pt_energy_class[prev_t];
379
    ++token_costs;
380
381
382
383

    // ac tokens
    for (c = 1; c < eob; c++) {
      const int rc = scan[c];
384
      int t;
385

Dmitry Kovalev's avatar
Dmitry Kovalev committed
386
      v = qcoeff[rc];
387
      t = vp9_dct_value_tokens_ptr[v].token;
388
389
390
      if (use_fast_coef_costing) {
        cost += (*token_costs)[!prev_t][!prev_t][t] + vp9_dct_value_cost_ptr[v];
      } else {
391
        pt = get_coef_context(nb, token_cache, c);
392
        cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
393
        token_cache[rc] = vp9_pt_energy_class[t];
394
      }
395
      prev_t = t;
396
      if (!--band_left) {
397
398
        band_left = *band_count++;
        ++token_costs;
399
      }
400
    }
401
402

    // eob token
403
    if (band_left) {
404
405
406
      if (use_fast_coef_costing) {
        cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
      } else {
407
        pt = get_coef_context(nb, token_cache, c);
408
409
        cost += (*token_costs)[0][pt][EOB_TOKEN];
      }
410
    }
411
412
  }

413
  // is eob first coefficient;
414
  *A = *L = (c > 0);
415

416
417
  return cost;
}
418
419
420
421
422

#if CONFIG_VP9_HIGHBITDEPTH
static void dist_block(int plane, int block, TX_SIZE tx_size,
                       struct rdcost_block_args* args, int bd) {
#else
Alex Converse's avatar
Alex Converse committed
423
424
static void dist_block(int plane, int block, TX_SIZE tx_size,
                       struct rdcost_block_args* args) {
425
#endif  // CONFIG_VP9_HIGHBITDEPTH
426
  const int ss_txfrm_size = tx_size << 1;
Deb Mukherjee's avatar
Deb Mukherjee committed
427
428
  MACROBLOCK* const x = args->x;
  MACROBLOCKD* const xd = &x->e_mbd;
429
430
  const struct macroblock_plane *const p = &x->plane[plane];
  const struct macroblockd_plane *const pd = &xd->plane[plane];
Deb Mukherjee's avatar
Deb Mukherjee committed
431
  int64_t this_sse;
Alex Converse's avatar
Alex Converse committed
432
  int shift = tx_size == TX_32X32 ? 0 : 2;
433
434
  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
435
#if CONFIG_VP9_HIGHBITDEPTH
436
437
  args->dist = vp9_highbd_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
                                      &this_sse, bd) >> shift;
438
#else
439
440
  args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
                               &this_sse) >> shift;
441
#endif  // CONFIG_VP9_HIGHBITDEPTH
442
  args->sse  = this_sse >> shift;
443

hkuang's avatar
hkuang committed
444
  if (x->skip_encode && !is_inter_block(&xd->mi[0].src_mi->mbmi)) {
445
446
    // TODO(jingning): tune the model to better capture the distortion.
    int64_t p = (pd->dequant[1] * pd->dequant[1] *
447
                    (1 << ss_txfrm_size)) >> (shift + 2);
448
449
450
451
452
#if CONFIG_VP9_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      p >>= ((xd->bd - 8) * 2);
    }
#endif  // CONFIG_VP9_HIGHBITDEPTH
453
454
    args->dist += (p >> 4);
    args->sse  += p;
455
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
456
457
}

458
static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
Alex Converse's avatar
Alex Converse committed
459
                       TX_SIZE tx_size, struct rdcost_block_args* args) {
Deb Mukherjee's avatar
Deb Mukherjee committed
460
  int x_idx, y_idx;
Alex Converse's avatar
Alex Converse committed
461
  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x_idx, &y_idx);
Deb Mukherjee's avatar
Deb Mukherjee committed
462

463
  args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx,
Alex Converse's avatar
Alex Converse committed
464
                           args->t_left + y_idx, tx_size,
465
466
                           args->so->scan, args->so->neighbors,
                           args->use_fast_coef_costing);
Deb Mukherjee's avatar
Deb Mukherjee committed
467
468
}

469
470
static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
                          TX_SIZE tx_size, void *arg) {
Deb Mukherjee's avatar
Deb Mukherjee committed
471
472
473
  struct rdcost_block_args *args = arg;
  MACROBLOCK *const x = args->x;
  MACROBLOCKD *const xd = &x->e_mbd;
hkuang's avatar
hkuang committed
474
  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
475
  int64_t rd1, rd2, rd;
Deb Mukherjee's avatar
Deb Mukherjee committed
476

477
478
479
  if (args->skip)
    return;

480
  if (!is_inter_block(mbmi)) {
481
482
    struct encode_b_args arg = {x, NULL, &mbmi->skip};
    vp9_encode_block_intra(plane, block, plane_bsize, tx_size, &arg);
483
484
485
486
487
488
489
#if CONFIG_VP9_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      dist_block(plane, block, tx_size, args, xd->bd);
    } else {
      dist_block(plane, block, tx_size, args, 8);
    }
#else
490
    dist_block(plane, block, tx_size, args);
491
#endif  // CONFIG_VP9_HIGHBITDEPTH
492
493
  } else if (max_txsize_lookup[plane_bsize] == tx_size) {
    if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 0) {
494
495
      // full forward transform and quantization
      vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
496
497
498
499
500
501
502
#if CONFIG_VP9_HIGHBITDEPTH
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        dist_block(plane, block, tx_size, args, xd->bd);
      } else {
        dist_block(plane, block, tx_size, args, 8);
      }
#else
503
      dist_block(plane, block, tx_size, args);
504
#endif  // CONFIG_VP9_HIGHBITDEPTH
505
    } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 2) {
506
      // compute DC coefficient
507
508
      tran_low_t *const coeff   = BLOCK_OFFSET(x->plane[plane].coeff, block);
      tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
509
      vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size);
510
      args->sse  = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
511
      args->dist = args->sse;
512
513
514
      if (x->plane[plane].eobs[block]) {
        int64_t dc_correct = coeff[0] * coeff[0] -
            (coeff[0] - dqcoeff[0]) * (coeff[0] - dqcoeff[0]);
515
516
517
#if CONFIG_VP9_HIGHBITDEPTH
        dc_correct >>= ((xd->bd - 8) * 2);
#endif
518
519
520
        if (tx_size != TX_32X32)
          dc_correct >>= 2;

521
        args->dist = MAX(0, args->sse - dc_correct);
522
      }
523
524
525
    } else {
      // skip forward transform
      x->plane[plane].eobs[block] = 0;
526
      args->sse  = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
527
528
      args->dist = args->sse;
    }
529
530
531
  } else {
    // full forward transform and quantization
    vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
532
533
534
535
536
537
538
#if CONFIG_VP9_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      dist_block(plane, block, tx_size, args, xd->bd);
    } else {
      dist_block(plane, block, tx_size, args, 8);
    }
#else
539
    dist_block(plane, block, tx_size, args);
540
#endif  // CONFIG_VP9_HIGHBITDEPTH
541
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
542

543
  rate_block(plane, block, plane_bsize, tx_size, args);
544
545
  rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
  rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
546
547

  // TODO(jingning): temporarily enabled only for luma component
548
  rd = MIN(rd1, rd2);
Yaowu Xu's avatar
Yaowu Xu committed
549
  if (plane == 0)
550
    x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] ||
Yaowu Xu's avatar
Yaowu Xu committed
551
                                    (rd1 > rd2 && !xd->lossless);
552

553
554
555
  args->this_rate += args->rate;
  args->this_dist += args->dist;
  args->this_sse  += args->sse;
556
557
558
559
560
561
  args->this_rd += rd;

  if (args->this_rd > args->best_rd) {
    args->skip = 1;
    return;
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
562
563
}

564
static void txfm_rd_in_plane(MACROBLOCK *x,
565
566
567
                             int *rate, int64_t *distortion,
                             int *skippable, int64_t *sse,
                             int64_t ref_best_rd, int plane,
568
569
                             BLOCK_SIZE bsize, TX_SIZE tx_size,
                             int use_fast_coef_casting) {
Deb Mukherjee's avatar
Deb Mukherjee committed
570
  MACROBLOCKD *const xd = &x->e_mbd;
571
  const struct macroblockd_plane *const pd = &xd->plane[plane];
572
573
  struct rdcost_block_args args;
  vp9_zero(args);
574
575
  args.x = x;
  args.best_rd = ref_best_rd;
576
  args.use_fast_coef_costing = use_fast_coef_casting;
577

578
  if (plane == 0)
hkuang's avatar
hkuang committed
579
    xd->mi[0].src_mi->mbmi.tx_size = tx_size;
580

581
  vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
582

583
  args.so = get_scan(xd, tx_size, pd->plane_type, 0);
Deb Mukherjee's avatar
Deb Mukherjee committed
584

585
  vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
586
587
                                         block_rd_txfm, &args);
  if (args.skip) {
588
589
590
591
592
    *rate       = INT_MAX;
    *distortion = INT64_MAX;
    *sse        = INT64_MAX;
    *skippable  = 0;
  } else {
593
594
595
    *distortion = args.this_dist;
    *rate       = args.this_rate;
    *sse        = args.this_sse;
596
    *skippable  = vp9_is_skippable_in_plane(x, bsize, plane);
597
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
598
599
}

600
601
602
603
604
static void choose_largest_tx_size(VP9_COMP *cpi, MACROBLOCK *x,
                                   int *rate, int64_t *distortion,
                                   int *skip, int64_t *sse,
                                   int64_t ref_best_rd,
                                   BLOCK_SIZE bs) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
605
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
Deb Mukherjee's avatar
Deb Mukherjee committed
606
  VP9_COMMON *const cm = &cpi->common;
Yaowu Xu's avatar
Yaowu Xu committed
607
  const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
Deb Mukherjee's avatar
Deb Mukherjee committed
608
  MACROBLOCKD *const xd = &x->e_mbd;
hkuang's avatar
hkuang committed
609
  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
Yaowu Xu's avatar
Yaowu Xu committed
610
611
612

  mbmi->tx_size = MIN(max_tx_size, largest_tx_size);

Alex Converse's avatar
Alex Converse committed
613
  txfm_rd_in_plane(x, rate, distortion, skip,
614
                   sse, ref_best_rd, 0, bs,
615
                   mbmi->tx_size, cpi->sf.use_fast_coef_costing);
Deb Mukherjee's avatar
Deb Mukherjee committed
616
617
}

618
static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
619
620
621
622
                                   int *rate,
                                   int64_t *distortion,
                                   int *skip,
                                   int64_t *psse,
623
                                   int64_t tx_cache[TX_MODES],
624
                                   int64_t ref_best_rd,
625
                                   BLOCK_SIZE bs) {
626
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
627
628
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
hkuang's avatar
hkuang committed
629
  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
630
  vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
631
632
  int r[TX_SIZES][2], s[TX_SIZES];
  int64_t d[TX_SIZES], sse[TX_SIZES];
633
634
635
636
  int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
                             {INT64_MAX, INT64_MAX},
                             {INT64_MAX, INT64_MAX},
                             {INT64_MAX, INT64_MAX}};
637
  int n, m;
638
  int s0, s1;
639
640
  const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
  int64_t best_rd = INT64_MAX;
641
  TX_SIZE best_tx = max_tx_size;
642

643
  const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs);
644
645
646
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
647

648
  for (n = max_tx_size; n >= 0;  n--) {
649
650
651
    txfm_rd_in_plane(x, &r[n][0], &d[n], &s[n],
                     &sse[n], ref_best_rd, 0, bs, n,
                     cpi->sf.use_fast_coef_costing);
652
653
    r[n][1] = r[n][0];
    if (r[n][0] < INT_MAX) {
654
      for (m = 0; m <= n - (n == (int) max_tx_size); m++) {
655
656
657
658
659
660
        if (m == n)
          r[n][1] += vp9_cost_zero(tx_probs[m]);
        else
          r[n][1] += vp9_cost_one(tx_probs[m]);
      }
    }
661
662
    if (d[n] == INT64_MAX) {
      rd[n][0] = rd[n][1] = INT64_MAX;
663
    } else if (s[n]) {
664
665
666
667
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
668
    }
669

670
671
672
    // Early termination in transform size search.
    if (cpi->sf.tx_size_search_breakout &&
        (rd[n][1] == INT64_MAX ||
673
        (n < (int) max_tx_size && rd[n][1] > rd[n + 1][1]) ||
674
675
676
        s[n] == 1))
      break;

677
678
679
680
    if (rd[n][1] < best_rd) {
      best_tx = n;
      best_rd = rd[n][1];
    }
681
  }
682
683
  mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
                      best_tx : MIN(max_tx_size, max_mode_tx_size);
684
685


686
687
688
  *distortion = d[mbmi->tx_size];
  *rate       = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT];
  *skip       = s[mbmi->tx_size];
689
  *psse       = sse[mbmi->tx_size];
690

691
692
693
694
  tx_cache[ONLY_4X4] = rd[TX_4X4][0];
  tx_cache[ALLOW_8X8] = rd[TX_8X8][0];
  tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0];
  tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0];
695

696
697
698
699
  if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
    tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
  } else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) {
    tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
Deb Mukherjee's avatar
Deb Mukherjee committed
700
  } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
701
    tx_cache[TX_MODE_SELECT] = rd[TX_8X8][1];
Deb Mukherjee's avatar
Deb Mukherjee committed
702
  } else {
703
    tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1];
704
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
705
}
706

707
708
709
710
711
static void super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
                            int64_t *distortion, int *skip,
                            int64_t *psse, BLOCK_SIZE bs,
                            int64_t txfm_cache[TX_MODES],
                            int64_t ref_best_rd) {
Jim Bankoski's avatar
Jim Bankoski committed
712
  MACROBLOCKD *xd = &x->e_mbd;
713
714
  int64_t sse;
  int64_t *ret_sse = psse ? psse : &sse;
Yaowu Xu's avatar
Yaowu Xu committed
715

hkuang's avatar
hkuang committed
716
  assert(bs == xd->mi[0].src_mi->mbmi.sb_type);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
717

718
  if (cpi->sf.tx_size_search_method == USE_LARGESTALL || xd->lossless) {
719
    vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
720
    choose_largest_tx_size(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd,
721
                           bs);
722
  } else {
723
    choose_tx_size_from_rd(cpi, x, rate, distortion, skip, ret_sse,
724
                           txfm_cache, ref_best_rd, bs);
725
726
727
  }
}

728
729
static int conditional_skipintra(PREDICTION_MODE mode,
                                 PREDICTION_MODE best_intra_mode) {
730
731
732
733
734
735
736
737
  if (mode == D117_PRED &&
      best_intra_mode != V_PRED &&
      best_intra_mode != D135_PRED)
    return 1;
  if (mode == D63_PRED &&
      best_intra_mode != V_PRED &&
      best_intra_mode != D45_PRED)
    return 1;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
738
  if (mode == D207_PRED &&
739
740
741
742
743
744
745
746
747
748
      best_intra_mode != H_PRED &&
      best_intra_mode != D45_PRED)
    return 1;
  if (mode == D153_PRED &&
      best_intra_mode != H_PRED &&
      best_intra_mode != D135_PRED)
    return 1;
  return 0;
}

749
static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
750
                                     PREDICTION_MODE *best_mode,
751
                                     const int *bmode_costs,
752
753
                                     ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                                     int *bestrate, int *bestratey,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
754
                                     int64_t *bestdistortion,
755
                                     BLOCK_SIZE bsize, int64_t rd_thresh) {
756
  PREDICTION_MODE mode;
757
  MACROBLOCKD *const xd = &x->e_mbd;
758
  int64_t best_rd = rd_thresh;
759

760
761
762
  struct macroblock_plane *p = &x->plane[0];
  struct macroblockd_plane *pd = &xd->plane[0];
  const int src_stride = p->src.stride;
763
  const int dst_stride = pd->dst.stride;
764
765
766
767
  const uint8_t *src_init = &p->src.buf[raster_block_offset(BLOCK_8X8, ib,
                                                            src_stride)];
  uint8_t *dst_init = &pd->dst.buf[raster_block_offset(BLOCK_8X8, ib,
                                                       dst_stride)];
768
769
  ENTROPY_CONTEXT ta[2], tempa[2];
  ENTROPY_CONTEXT tl[2], templ[2];
770

Jim Bankoski's avatar
Jim Bankoski committed
771
772
  const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
  const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
773
  int idx, idy;
774
  uint8_t best_dst[8 * 8];
775
776
777
#if CONFIG_VP9_HIGHBITDEPTH
  uint16_t best_dst16[8 * 8];
#endif
John Koleszar's avatar
John Koleszar committed
778

Jingning Han's avatar
Jingning Han committed
779
  assert(ib < 4);
780

781
782
  vpx_memcpy(ta, a, sizeof(ta));
  vpx_memcpy(tl, l, sizeof(tl));
hkuang's avatar
hkuang committed
783
  xd->mi[0].src_mi->mbmi.tx_size = TX_4X4;
784

785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
#if CONFIG_VP9_HIGHBITDEPTH
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
      int64_t this_rd;
      int ratey = 0;
      int64_t distortion = 0;
      int rate = bmode_costs[mode];

      if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
        continue;

      // Only do the oblique modes if the best so far is
      // one of the neighboring directional modes
      if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
        if (conditional_skipintra(mode, *best_mode))
            continue;
      }

      vpx_memcpy(tempa, ta, sizeof(ta));
      vpx_memcpy(templ, tl, sizeof(tl));

      for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
        for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
          const int block = ib + idy * 2 + idx;
          const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
          uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
          int16_t *const src_diff = raster_block_offset_int16(BLOCK_8X8, block,
                                                              p->src_diff);
          tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
          xd->mi[0].src_mi->bmi[block].as_mode = mode;
          vp9_predict_intra_block(xd, block, 1,
                                  TX_4X4, mode,
                                  x->skip_encode ? src : dst,
                                  x->skip_encode ? src_stride : dst_stride,
                                  dst, dst_stride, idx, idy, 0);
820
821
          vp9_highbd_subtract_block(4, 4, src_diff, 8, src, src_stride,
                                    dst, dst_stride, xd->bd);
822
823
          if (xd->lossless) {
            const scan_order *so = &vp9_default_scan_orders[TX_4X4];
824
            vp9_highbd_fwht4x4(src_diff, coeff, 8);
825
826
827
828
829
830
            vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
            ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
                                 so->scan, so->neighbors,
                                 cpi->sf.use_fast_coef_costing);
            if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
              goto next_highbd;
831
832
833
            vp9_highbd_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block),
                                   dst, dst_stride,
                                   p->eobs[block], xd->bd);
834
835
836
837
          } else {
            int64_t unused;
            const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block);
            const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type];
838
            vp9_highbd_fht4x4(src_diff, coeff, 8, tx_type);
839
840
841
842
            vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
            ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
                                 so->scan, so->neighbors,
                                 cpi->sf.use_fast_coef_costing);
843
844
845
            distortion += vp9_highbd_block_error(
                coeff, BLOCK_OFFSET(pd->dqcoeff, block),
                16, &unused, xd->bd) >> 2;
846
847
            if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
              goto next_highbd;
848
849
            vp9_highbd_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block),
                                  dst, dst_stride, p->eobs[block], xd->bd);
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
          }
        }
      }

      rate += ratey;
      this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);

      if (this_rd < best_rd) {
        *bestrate = rate;
        *bestratey = ratey;
        *bestdistortion = distortion;
        best_rd = this_rd;
        *best_mode = mode;
        vpx_memcpy(a, tempa, sizeof(tempa));
        vpx_memcpy(l, templ, sizeof(templ));
        for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
          vpx_memcpy(best_dst16 + idy * 8,
                     CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
                     num_4x4_blocks_wide * 4 * sizeof(uint16_t));
        }
      }
    next_highbd:
      {}
    }
    if (best_rd >= rd_thresh || x->skip_encode)
      return best_rd;

    for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
      vpx_memcpy(CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
                 best_dst16 + idy * 8,
                 num_4x4_blocks_wide * 4 * sizeof(uint16_t));
    }

    return best_rd;
  }
#endif  // CONFIG_VP9_HIGHBITDEPTH

887
  for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
888
    int64_t this_rd;
889
    int ratey = 0;
890
891
    int64_t distortion = 0;
    int rate = bmode_costs[mode];
892

893
    if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
894
895
      continue;

896
897
    // Only do the oblique modes if the best so far is
    // one of the neighboring directional modes
898
899
    if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
      if (conditional_skipintra(mode, *best_mode))
900
901
          continue;
    }
902