vp9_rdopt.c 119 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
11
12
13
14
15
 */


#include <stdio.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
16

17
#include "vp9/common/vp9_pragmas.h"
18
19
20
21
22
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_modecosts.h"
#include "vp9/encoder/vp9_encodeintra.h"
23
24
25
26
27
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_quant_common.h"
28
29
30
31
32
33
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_variance.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_ratectrl.h"
John Koleszar's avatar
John Koleszar committed
34
#include "vpx_mem/vpx_mem.h"
35
36
37
38
39
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_entropy.h"
40
#include "vp9_rtcd.h"
41
#include "vp9/common/vp9_mvref_common.h"
Ronald S. Bultje's avatar
Ronald S. Bultje committed
42
#include "vp9/common/vp9_common.h"
Paul Wilkins's avatar
Paul Wilkins committed
43

44
45
#define INVALID_MV 0x80008000

46
47
48
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1

49
50
51
DECLARE_ALIGNED(16, extern const uint8_t,
                vp9_pt_energy_class[MAX_ENTROPY_TOKENS]);

Ronald S. Bultje's avatar
Ronald S. Bultje committed
52
#define I4X4_PRED 0x8000
Ronald S. Bultje's avatar
Ronald S. Bultje committed
53
#define SPLITMV 0x10000
Ronald S. Bultje's avatar
Ronald S. Bultje committed
54

55
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
56
57
  {ZEROMV,    LAST_FRAME,   NONE},
  {DC_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
58

59
60
  {NEARESTMV, LAST_FRAME,   NONE},
  {NEARMV,    LAST_FRAME,   NONE},
John Koleszar's avatar
John Koleszar committed
61

62
63
  {ZEROMV,    GOLDEN_FRAME, NONE},
  {NEARESTMV, GOLDEN_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
64

65
66
  {ZEROMV,    ALTREF_FRAME, NONE},
  {NEARESTMV, ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
67

68
69
  {NEARMV,    GOLDEN_FRAME, NONE},
  {NEARMV,    ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
70

71
72
73
74
75
76
77
78
  {V_PRED,    INTRA_FRAME,  NONE},
  {H_PRED,    INTRA_FRAME,  NONE},
  {D45_PRED,  INTRA_FRAME,  NONE},
  {D135_PRED, INTRA_FRAME,  NONE},
  {D117_PRED, INTRA_FRAME,  NONE},
  {D153_PRED, INTRA_FRAME,  NONE},
  {D27_PRED,  INTRA_FRAME,  NONE},
  {D63_PRED,  INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
79

80
  {TM_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
81

82
83
84
  {NEWMV,     LAST_FRAME,   NONE},
  {NEWMV,     GOLDEN_FRAME, NONE},
  {NEWMV,     ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
85

86
87
88
  {SPLITMV,   LAST_FRAME,   NONE},
  {SPLITMV,   GOLDEN_FRAME, NONE},
  {SPLITMV,   ALTREF_FRAME, NONE},
89

Ronald S. Bultje's avatar
Ronald S. Bultje committed
90
  {I4X4_PRED, INTRA_FRAME,  NONE},
91

John Koleszar's avatar
John Koleszar committed
92
93
94
95
  /* compound prediction modes */
  {ZEROMV,    LAST_FRAME,   GOLDEN_FRAME},
  {NEARESTMV, LAST_FRAME,   GOLDEN_FRAME},
  {NEARMV,    LAST_FRAME,   GOLDEN_FRAME},
96

John Koleszar's avatar
John Koleszar committed
97
98
99
  {ZEROMV,    ALTREF_FRAME, LAST_FRAME},
  {NEARESTMV, ALTREF_FRAME, LAST_FRAME},
  {NEARMV,    ALTREF_FRAME, LAST_FRAME},
100

John Koleszar's avatar
John Koleszar committed
101
102
103
  {ZEROMV,    GOLDEN_FRAME, ALTREF_FRAME},
  {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
  {NEARMV,    GOLDEN_FRAME, ALTREF_FRAME},
104

John Koleszar's avatar
John Koleszar committed
105
106
107
  {NEWMV,     LAST_FRAME,   GOLDEN_FRAME},
  {NEWMV,     ALTREF_FRAME, LAST_FRAME  },
  {NEWMV,     GOLDEN_FRAME, ALTREF_FRAME},
108

John Koleszar's avatar
John Koleszar committed
109
110
  {SPLITMV,   LAST_FRAME,   GOLDEN_FRAME},
  {SPLITMV,   ALTREF_FRAME, LAST_FRAME  },
111
  {SPLITMV,   GOLDEN_FRAME, ALTREF_FRAME},
John Koleszar's avatar
John Koleszar committed
112
113
};

114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#if CONFIG_BALANCED_COEFTREE
static void fill_token_costs(vp9_coeff_count *c,
                             vp9_coeff_count *cnoskip,
                             vp9_coeff_probs_model *p,
                             TX_SIZE tx_size) {
  int i, j, k, l;
  for (i = 0; i < BLOCK_TYPES; i++)
    for (j = 0; j < REF_TYPES; j++)
      for (k = 0; k < COEF_BANDS; k++)
        for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
          vp9_prob probs[ENTROPY_NODES];
          vp9_model_to_full_probs(p[i][j][k][l], probs);
          vp9_cost_tokens((int *)cnoskip[i][j][k][l], probs,
                          vp9_coef_tree);
          // Replace the eob node prob with a very small value so that the
          // cost approximately equals the cost without the eob node
          probs[1] = 1;
          vp9_cost_tokens((int *)c[i][j][k][l], probs, vp9_coef_tree);
        }
}
#else
135
static void fill_token_costs(vp9_coeff_count *c,
136
                             vp9_coeff_probs_model *p,
137
                             TX_SIZE tx_size) {
138
  int i, j, k, l;
139
  for (i = 0; i < BLOCK_TYPES; i++)
140
141
    for (j = 0; j < REF_TYPES; j++)
      for (k = 0; k < COEF_BANDS; k++)
142
143
        for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
          vp9_prob probs[ENTROPY_NODES];
144
          vp9_model_to_full_probs(p[i][j][k][l], probs);
145
146
147
          vp9_cost_tokens_skip((int *)c[i][j][k][l], probs,
                               vp9_coef_tree);
        }
148
}
149
#endif
150

151
152
153
154
static int rd_iifactor[32] =  { 4, 4, 3, 2, 1, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0, };
John Koleszar's avatar
John Koleszar committed
155

156
// 3* dc_qlookup[Q]*dc_qlookup[Q];
157

158
/* values are now correlated to quantizer */
Paul Wilkins's avatar
Paul Wilkins committed
159
160
161
static int sad_per_bit16lut[QINDEX_RANGE];
static int sad_per_bit4lut[QINDEX_RANGE];

162
void vp9_init_me_luts() {
John Koleszar's avatar
John Koleszar committed
163
164
165
166
167
168
169
  int i;

  // Initialize the sad lut tables using a formulaic calculation for now
  // This is to make it easier to resolve the impact of experimental changes
  // to the quantizer tables.
  for (i = 0; i < QINDEX_RANGE; i++) {
    sad_per_bit16lut[i] =
170
      (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
171
    sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742);
John Koleszar's avatar
John Koleszar committed
172
  }
Paul Wilkins's avatar
Paul Wilkins committed
173
}
John Koleszar's avatar
John Koleszar committed
174

175
static int compute_rd_mult(int qindex) {
176
  const int q = vp9_dc_quant(qindex, 0);
177
  return (11 * q * q) >> 2;
178
179
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
180
181
182
void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
  cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
  cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
John Koleszar's avatar
John Koleszar committed
183
184
}

185

Dmitry Kovalev's avatar
Dmitry Kovalev committed
186
void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
187
  int q, i;
John Koleszar's avatar
John Koleszar committed
188

189
  vp9_clear_system_state();  // __asm emms;
John Koleszar's avatar
John Koleszar committed
190

John Koleszar's avatar
John Koleszar committed
191
192
193
194
  // Further tests required to see if optimum is different
  // for key frames, golden frames and arf frames.
  // if (cpi->common.refresh_golden_frame ||
  //     cpi->common.refresh_alt_ref_frame)
195
  qindex = clamp(qindex, 0, MAXQ);
196

Dmitry Kovalev's avatar
Dmitry Kovalev committed
197
  cpi->RDMULT = compute_rd_mult(qindex);
John Koleszar's avatar
John Koleszar committed
198
199
200
201
202
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
    if (cpi->twopass.next_iiratio > 31)
      cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
    else
      cpi->RDMULT +=
Dmitry Kovalev's avatar
Dmitry Kovalev committed
203
          (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
John Koleszar's avatar
John Koleszar committed
204
  }
205
  cpi->mb.errorperbit = cpi->RDMULT >> 6;
John Koleszar's avatar
John Koleszar committed
206
  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
207

208
  vp9_set_speed_features(cpi);
John Koleszar's avatar
John Koleszar committed
209

Dmitry Kovalev's avatar
Dmitry Kovalev committed
210
211
  q = (int)pow(vp9_dc_quant(qindex, 0) >> 2, 1.25);
  q <<= 2;
John Koleszar's avatar
John Koleszar committed
212
213
  if (q < 8)
    q = 8;
214

John Koleszar's avatar
John Koleszar committed
215
216
217
  if (cpi->RDMULT > 1000) {
    cpi->RDDIV = 1;
    cpi->RDMULT /= 100;
John Koleszar's avatar
John Koleszar committed
218

John Koleszar's avatar
John Koleszar committed
219
220
221
222
223
224
225
    for (i = 0; i < MAX_MODES; i++) {
      if (cpi->sf.thresh_mult[i] < INT_MAX) {
        cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
      } else {
        cpi->rd_threshes[i] = INT_MAX;
      }
      cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
John Koleszar's avatar
John Koleszar committed
226
    }
John Koleszar's avatar
John Koleszar committed
227
228
  } else {
    cpi->RDDIV = 100;
John Koleszar's avatar
John Koleszar committed
229

John Koleszar's avatar
John Koleszar committed
230
231
232
233
234
235
236
    for (i = 0; i < MAX_MODES; i++) {
      if (cpi->sf.thresh_mult[i] < (INT_MAX / q)) {
        cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
      } else {
        cpi->rd_threshes[i] = INT_MAX;
      }
      cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
John Koleszar's avatar
John Koleszar committed
237
    }
John Koleszar's avatar
John Koleszar committed
238
  }
John Koleszar's avatar
John Koleszar committed
239

240
#if CONFIG_BALANCED_COEFTREE
241
  fill_token_costs(cpi->mb.token_costs[TX_4X4],
242
                   cpi->mb.token_costs_noskip[TX_4X4],
243
                   cpi->common.fc.coef_probs_4x4, TX_4X4);
244
  fill_token_costs(cpi->mb.token_costs[TX_8X8],
245
                   cpi->mb.token_costs_noskip[TX_8X8],
246
                   cpi->common.fc.coef_probs_8x8, TX_8X8);
247
  fill_token_costs(cpi->mb.token_costs[TX_16X16],
248
                   cpi->mb.token_costs_noskip[TX_16X16],
249
                   cpi->common.fc.coef_probs_16x16, TX_16X16);
250
  fill_token_costs(cpi->mb.token_costs[TX_32X32],
251
                   cpi->mb.token_costs_noskip[TX_32X32],
252
                   cpi->common.fc.coef_probs_32x32, TX_32X32);
253
254
255
256
257
258
259
260
261
262
#else
  fill_token_costs(cpi->mb.token_costs[TX_4X4],
                   cpi->common.fc.coef_probs_4x4, TX_4X4);
  fill_token_costs(cpi->mb.token_costs[TX_8X8],
                   cpi->common.fc.coef_probs_8x8, TX_8X8);
  fill_token_costs(cpi->mb.token_costs[TX_16X16],
                   cpi->common.fc.coef_probs_16x16, TX_16X16);
  fill_token_costs(cpi->mb.token_costs[TX_32X32],
                   cpi->common.fc.coef_probs_32x32, TX_32X32);
#endif
263

264
  for (i = 0; i < NUM_PARTITION_CONTEXTS; i++)
265
266
267
268
    vp9_cost_tokens(cpi->mb.partition_cost[i],
                    cpi->common.fc.partition_prob[i],
                    vp9_partition_tree);

John Koleszar's avatar
John Koleszar committed
269
  /*rough estimate for costing*/
270
  vp9_init_mode_costs(cpi);
John Koleszar's avatar
John Koleszar committed
271

272
  if (cpi->common.frame_type != KEY_FRAME) {
273
    vp9_build_nmv_cost_table(
274
275
276
277
278
279
        cpi->mb.nmvjointcost,
        cpi->mb.e_mbd.allow_high_precision_mv ?
        cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
        &cpi->common.fc.nmvc,
        cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
  }
John Koleszar's avatar
John Koleszar committed
280
281
}

282
int vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, int block_size) {
283
  int i, error = 0;
John Koleszar's avatar
John Koleszar committed
284

285
  for (i = 0; i < block_size; i++) {
John Koleszar's avatar
John Koleszar committed
286
287
288
    int this_diff = coeff[i] - dqcoeff[i];
    error += this_diff * this_diff;
  }
John Koleszar's avatar
John Koleszar committed
289

John Koleszar's avatar
John Koleszar committed
290
  return error;
John Koleszar's avatar
John Koleszar committed
291
292
}

293
static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
294
                              int plane, int block, PLANE_TYPE type,
295
296
                              ENTROPY_CONTEXT *A,
                              ENTROPY_CONTEXT *L,
John Koleszar's avatar
John Koleszar committed
297
298
                              TX_SIZE tx_size,
                              int y_blocks) {
299
  MACROBLOCKD *const xd = &mb->e_mbd;
300
301
  MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
  int pt;
302
  int c = 0;
303
304
  int cost = 0, pad;
  const int *scan, *nb;
305
306
307
  const int eob = xd->plane[plane].eobs[block];
  const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff,
                                           block, 16);
308
  const int ref = mbmi->ref_frame != INTRA_FRAME;
309
  unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
310
      mb->token_costs[tx_size][type][ref];
311
  ENTROPY_CONTEXT above_ec, left_ec;
312
  TX_TYPE tx_type = DCT_DCT;
313

314
  const int segment_id = xd->mode_info_context->mbmi.segment_id;
315
316
317
318
#if CONFIG_BALANCED_COEFTREE
  unsigned int (*token_costs_noskip)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
      mb->token_costs_noskip[tx_size][type][ref];
#else
319
  vp9_prob coef_probs[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
320
321
#endif

322
323
  int seg_eob, default_eob;
  uint8_t token_cache[1024];
Paul Wilkins's avatar
Paul Wilkins committed
324
  const uint8_t * band_translate;
325
326

  // Check for consistency of tx_size with mode info
327
  assert((!type && !plane) || (type && plane));
328
329
330
331
332
333
334
  if (type == PLANE_TYPE_Y_WITH_DC) {
    assert(xd->mode_info_context->mbmi.txfm_size == tx_size);
  } else {
    TX_SIZE tx_size_uv = get_uv_tx_size(xd);
    assert(tx_size == tx_size_uv);
  }

335
  switch (tx_size) {
336
    case TX_4X4: {
337
      tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
338
          get_tx_type_4x4(xd, block) : DCT_DCT;
339
340
      above_ec = A[0] != 0;
      left_ec = L[0] != 0;
341
#if !CONFIG_BALANCED_COEFTREE
342
      vp9_model_to_full_probs_sb(cm->fc.coef_probs_4x4[type][ref],
343
                                 coef_probs);
344
#endif
345
      seg_eob = 16;
346
      scan = get_scan_4x4(tx_type);
Paul Wilkins's avatar
Paul Wilkins committed
347
      band_translate = vp9_coefband_trans_4x4;
Daniel Kang's avatar
Daniel Kang committed
348
      break;
349
    }
350
351
    case TX_8X8: {
      const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
352
      const int sz = 1 + b_width_log2(sb_type);
353
      const int x = block & ((1 << sz) - 1), y = block - x;
354
355
      TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
          get_tx_type_8x8(xd, y + (x >> 1)) : DCT_DCT;
356
357
      above_ec = (A[0] + A[1]) != 0;
      left_ec = (L[0] + L[1]) != 0;
358
      scan = get_scan_8x8(tx_type);
359
#if !CONFIG_BALANCED_COEFTREE
360
      vp9_model_to_full_probs_sb(cm->fc.coef_probs_8x8[type][ref],
361
                                 coef_probs);
362
#endif
363
      seg_eob = 64;
Paul Wilkins's avatar
Paul Wilkins committed
364
      band_translate = vp9_coefband_trans_8x8plus;
Daniel Kang's avatar
Daniel Kang committed
365
      break;
366
367
368
    }
    case TX_16X16: {
      const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
369
      const int sz = 2 + b_width_log2(sb_type);
370
      const int x = block & ((1 << sz) - 1), y = block - x;
371
372
      TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
          get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT;
373
      scan = get_scan_16x16(tx_type);
374
#if !CONFIG_BALANCED_COEFTREE
375
      vp9_model_to_full_probs_sb(cm->fc.coef_probs_16x16[type][ref],
376
                                 coef_probs);
377
#endif
378
      seg_eob = 256;
379
380
      above_ec = (A[0] + A[1] + A[2] + A[3]) != 0;
      left_ec = (L[0] + L[1] + L[2] + L[3]) != 0;
Paul Wilkins's avatar
Paul Wilkins committed
381
      band_translate = vp9_coefband_trans_8x8plus;
Daniel Kang's avatar
Daniel Kang committed
382
      break;
383
    }
384
    case TX_32X32:
Paul Wilkins's avatar
Paul Wilkins committed
385
      scan = vp9_default_scan_32x32;
386
#if !CONFIG_BALANCED_COEFTREE
387
      vp9_model_to_full_probs_sb(cm->fc.coef_probs_32x32[type][ref],
388
                                 coef_probs);
389
#endif
390
      seg_eob = 1024;
391
392
      above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0;
      left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0;
Paul Wilkins's avatar
Paul Wilkins committed
393
      band_translate = vp9_coefband_trans_8x8plus;
394
      break;
Daniel Kang's avatar
Daniel Kang committed
395
    default:
396
      abort();
Daniel Kang's avatar
Daniel Kang committed
397
398
      break;
  }
John Koleszar's avatar
John Koleszar committed
399
  assert(eob <= seg_eob);
400

401
  pt = combine_entropy_contexts(above_ec, left_ec);
402
403
  nb = vp9_get_coef_neighbors_handle(scan, &pad);
  default_eob = seg_eob;
404

405
406
  if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))
    seg_eob = 0;
407

408
409
410
411
  /* sanity check to ensure that we do not have spurious non-zero q values */
  if (eob < seg_eob)
    assert(qcoeff_ptr[scan[eob]] == 0);

412
  {
413
    for (c = 0; c < eob; c++) {
414
      int v = qcoeff_ptr[scan[c]];
415
      int t = vp9_dct_value_tokens_ptr[v].token;
Paul Wilkins's avatar
Paul Wilkins committed
416
      int band = get_coef_band(band_translate, c);
417
418
      if (c)
        pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
419

420
421
422
423
424
425
#if CONFIG_BALANCED_COEFTREE
      if (!c || token_cache[scan[c - 1]])  // do not skip eob
        cost += token_costs_noskip[band][pt][t] + vp9_dct_value_cost_ptr[v];
      else
        cost += token_costs[band][pt][t] + vp9_dct_value_cost_ptr[v];
#else
426
      cost += token_costs[band][pt][t] + vp9_dct_value_cost_ptr[v];
427
      if (!c || token_cache[scan[c - 1]])
428
        cost += vp9_cost_bit(coef_probs[band][pt][0], 1);
429
#endif
430
      token_cache[scan[c]] = vp9_pt_energy_class[t];
431
    }
432
433
434
    if (c < seg_eob) {
      if (c)
        pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
435
436
437
438
439
#if CONFIG_BALANCED_COEFTREE
      cost += mb->token_costs_noskip[tx_size][type][ref]
          [get_coef_band(band_translate, c)]
          [pt][DCT_EOB_TOKEN];
#else
440
      cost += mb->token_costs[tx_size][type][ref]
Paul Wilkins's avatar
Paul Wilkins committed
441
          [get_coef_band(band_translate, c)]
442
          [pt][DCT_EOB_TOKEN];
443
#endif
444
    }
445
446
  }

447
448
449
  // is eob first coefficient;
  for (pt = 0; pt < (1 << tx_size); pt++) {
    A[pt] = L[pt] = c > 0;
450
  }
451

452
453
454
  return cost;
}

455
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
456
457
458
459
460
                                     int (*r)[2], int *rate,
                                     int *d, int *distortion,
                                     int *s, int *skip,
                                     int64_t txfm_cache[NB_TXFM_MODES],
                                     TX_SIZE max_txfm_size) {
461
462
463
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
464
  vp9_prob skip_prob = vp9_get_pred_prob(cm, xd, PRED_MBSKIP);
465
466
  int64_t rd[TX_SIZE_MAX_SB][2];
  int n, m;
467
  int s0, s1;
468
469
470
471
472
473
474
475
476
477

  for (n = TX_4X4; n <= max_txfm_size; n++) {
    r[n][1] = r[n][0];
    for (m = 0; m <= n - (n == max_txfm_size); m++) {
      if (m == n)
        r[n][1] += vp9_cost_zero(cm->prob_tx[m]);
      else
        r[n][1] += vp9_cost_one(cm->prob_tx[m]);
    }
  }
478

479
480
481
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
482

483
484
485
486
487
488
  for (n = TX_4X4; n <= max_txfm_size; n++) {
    if (s[n]) {
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
489
490
491
    }
  }

492
493
494
495
496
497
  if (max_txfm_size == TX_32X32 &&
      (cm->txfm_mode == ALLOW_32X32 ||
       (cm->txfm_mode == TX_MODE_SELECT &&
        rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
        rd[TX_32X32][1] < rd[TX_4X4][1]))) {
    mbmi->txfm_size = TX_32X32;
498
499
500
501
502
503
  } else if (max_txfm_size >= TX_16X16 &&
             (cm->txfm_mode == ALLOW_16X16 ||
              cm->txfm_mode == ALLOW_32X32 ||
              (cm->txfm_mode == TX_MODE_SELECT &&
               rd[TX_16X16][1] < rd[TX_8X8][1] &&
               rd[TX_16X16][1] < rd[TX_4X4][1]))) {
504
    mbmi->txfm_size = TX_16X16;
505
  } else if (cm->txfm_mode == ALLOW_8X8 ||
506
507
             cm->txfm_mode == ALLOW_16X16 ||
             cm->txfm_mode == ALLOW_32X32 ||
508
           (cm->txfm_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
509
510
511
512
513
    mbmi->txfm_size = TX_8X8;
  } else {
    mbmi->txfm_size = TX_4X4;
  }

514
  *distortion = d[mbmi->txfm_size];
515
  *rate       = r[mbmi->txfm_size][cm->txfm_mode == TX_MODE_SELECT];
516
517
  *skip       = s[mbmi->txfm_size];

518
519
  txfm_cache[ONLY_4X4] = rd[TX_4X4][0];
  txfm_cache[ALLOW_8X8] = rd[TX_8X8][0];
520
521
  txfm_cache[ALLOW_16X16] = rd[MIN(max_txfm_size, TX_16X16)][0];
  txfm_cache[ALLOW_32X32] = rd[MIN(max_txfm_size, TX_32X32)][0];
522
523
524
525
  if (max_txfm_size == TX_32X32 &&
      rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
      rd[TX_32X32][1] < rd[TX_4X4][1])
    txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
526
527
  else if (max_txfm_size >= TX_16X16 &&
           rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
528
    txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
529
  else
530
531
    txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
                                 rd[TX_4X4][1] : rd[TX_8X8][1];
532
533
}

534
535
static int block_error(int16_t *coeff, int16_t *dqcoeff,
                       int block_size, int shift) {
536
537
538
539
  int i;
  int64_t error = 0;

  for (i = 0; i < block_size; i++) {
540
    int this_diff = coeff[i] - dqcoeff[i];
541
    error += (unsigned)this_diff * this_diff;
542
  }
543
  error >>= shift;
544

Frank Galligan's avatar
Frank Galligan committed
545
  return error > INT_MAX ? INT_MAX : (int)error;
546
547
}

548
549
static int block_error_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) {
  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
550
  return block_error(x->plane[0].coeff, x->e_mbd.plane[0].dqcoeff,
551
                     16 << (bwl + bhl), shift);
552
}
553

554
555
556
557
static int block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) {
  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
  int64_t sum = 0;
  int plane;
558

559
560
561
562
563
564
565
566
  for (plane = 1; plane < MAX_MB_PLANE; plane++) {
    const int subsampling = x->e_mbd.plane[plane].subsampling_x +
                            x->e_mbd.plane[plane].subsampling_y;
    sum += block_error(x->plane[plane].coeff, x->e_mbd.plane[plane].dqcoeff,
                       16 << (bwl + bhl - subsampling), 0);
  }
  sum >>= shift;
  return sum > INT_MAX ? INT_MAX : (int)sum;
567
568
}

569
570
static int rdcost_plane(VP9_COMMON *const cm, MACROBLOCK *x,
                        int plane, BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
571
  MACROBLOCKD *const xd = &x->e_mbd;
572
573
574
  const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
  const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y;
  const int bw = 1 << bwl, bh = 1 << bhl;
575
  ENTROPY_CONTEXT t_above[16], t_left[16];
576
  int block, cost;
577

578
  vpx_memcpy(&t_above, xd->plane[plane].above_context,
579
             sizeof(ENTROPY_CONTEXT) * bw);
580
  vpx_memcpy(&t_left,  xd->plane[plane].left_context,
581
             sizeof(ENTROPY_CONTEXT) * bh);
582

583
584
585
  cost = 0;
  for (block = 0; block < bw * bh; block += 1 << (tx_size * 2)) {
    int x_idx, y_idx;
586

587
588
    txfrm_block_to_raster_xy(xd, bsize, plane, block, tx_size * 2,
                             &x_idx, &y_idx);
589

590
591
592
    cost += cost_coeffs(cm, x, plane, block, xd->plane[plane].plane_type,
                        t_above + x_idx, t_left + y_idx,
                        tx_size, bw * bh);
593
  }
594
595
596
597

  return cost;
}

598
599
600
static int rdcost_uv(VP9_COMMON *const cm, MACROBLOCK *x,
                     BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
  int cost = 0, plane;
601

602
603
  for (plane = 1; plane < MAX_MB_PLANE; plane++) {
    cost += rdcost_plane(cm, x, plane, bsize, tx_size);
604
605
  }
  return cost;
606
607
}

608
609
610
static void super_block_yrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
                                     int *rate, int *distortion, int *skippable,
                                     BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
611
  MACROBLOCKD *const xd = &x->e_mbd;
612
613
  xd->mode_info_context->mbmi.txfm_size = tx_size;
  vp9_xform_quant_sby(cm, x, bsize);
614

615
616
  *distortion = block_error_sby(x, bsize, tx_size == TX_32X32 ? 0 : 2);
  *rate       = rdcost_plane(cm, x, 0, bsize, tx_size);
617
  *skippable  = vp9_sby_is_skippable(xd, bsize);
618
619
}

620
621
static void super_block_yrd(VP9_COMP *cpi,
                            MACROBLOCK *x, int *rate, int *distortion,
622
                            int *skip, BLOCK_SIZE_TYPE bs,
623
                            int64_t txfm_cache[NB_TXFM_MODES]) {
624
625
  VP9_COMMON *const cm = &cpi->common;
  int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB];
Jim Bankoski's avatar
Jim Bankoski committed
626
627
  MACROBLOCKD *xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
628

629
  vp9_subtract_sby(x, bs);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
630

Jim Bankoski's avatar
Jim Bankoski committed
631
632
633
634
635
636
637
638
639
640
641
642
643
644
  if (cpi->speed > 4) {
    if (bs >= BLOCK_SIZE_SB32X32) {
      mbmi->txfm_size = TX_32X32;
    } else if (bs >= BLOCK_SIZE_MB16X16) {
      mbmi->txfm_size = TX_16X16;
    } else if (bs >= BLOCK_SIZE_SB8X8) {
      mbmi->txfm_size = TX_8X8;
    } else {
      mbmi->txfm_size = TX_4X4;
    }
    super_block_yrd_for_txfm(cm, x, rate, distortion, skip, bs,
                             mbmi->txfm_size);
    return;
  }
645
  if (bs >= BLOCK_SIZE_SB32X32)
646
647
    super_block_yrd_for_txfm(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32],
                             bs, TX_32X32);
648
  if (bs >= BLOCK_SIZE_MB16X16)
649
650
651
652
653
654
    super_block_yrd_for_txfm(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16],
                             bs, TX_16X16);
  super_block_yrd_for_txfm(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], bs,
                           TX_8X8);
  super_block_yrd_for_txfm(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], bs,
                           TX_4X4);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
655
656

  choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache,
657
                           TX_32X32 - (bs < BLOCK_SIZE_SB32X32)
Jingning Han's avatar
Jingning Han committed
658
                           - (bs < BLOCK_SIZE_MB16X16));
Ronald S. Bultje's avatar
Ronald S. Bultje committed
659
}
Ronald S. Bultje's avatar
Ronald S. Bultje committed
660

661
static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
Yaowu Xu's avatar
Yaowu Xu committed
662
                                     MB_PREDICTION_MODE *best_mode,
663
664
665
                                     int *bmode_costs,
                                     ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                                     int *bestrate, int *bestratey,
666
667
668
                                     int *bestdistortion,
                                     BLOCK_SIZE_TYPE bsize) {
  MB_PREDICTION_MODE mode;
Deb Mukherjee's avatar
Deb Mukherjee committed
669
  MACROBLOCKD *xd = &x->e_mbd;
670
  int64_t best_rd = INT64_MAX;
John Koleszar's avatar
John Koleszar committed
671
672
  int rate = 0;
  int distortion;
673
  VP9_COMMON *const cm = &cpi->common;
John Koleszar's avatar
John Koleszar committed
674
  const int src_stride = x->plane[0].src.stride;
675
676
677
678
679
  uint8_t *src, *dst;
  int16_t *src_diff, *coeff;

  ENTROPY_CONTEXT ta[2], tempa[2];
  ENTROPY_CONTEXT tl[2], templ[2];
Deb Mukherjee's avatar
Deb Mukherjee committed
680
681
  TX_TYPE tx_type = DCT_DCT;
  TX_TYPE best_tx_type = DCT_DCT;
682
683
684
685
  int bw = 1 << b_width_log2(bsize);
  int bh = 1 << b_height_log2(bsize);
  int idx, idy, block;
  DECLARE_ALIGNED(16, int16_t, best_dqcoeff[4][16]);
John Koleszar's avatar
John Koleszar committed
686

Jingning Han's avatar
Jingning Han committed
687
  assert(ib < 4);
688

689
690
  vpx_memcpy(ta, a, sizeof(ta));
  vpx_memcpy(tl, l, sizeof(tl));
691
  xd->mode_info_context->mbmi.txfm_size = TX_4X4;
692
693

  for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
694
    int64_t this_rd;
695
    int ratey = 0;
696

697
    rate = bmode_costs[mode];
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
    distortion = 0;

    vpx_memcpy(tempa, ta, sizeof(ta));
    vpx_memcpy(templ, tl, sizeof(tl));

    for (idy = 0; idy < bh; ++idy) {
      for (idx = 0; idx < bw; ++idx) {
        block = ib + idy * 2 + idx;
        xd->mode_info_context->bmi[block].as_mode.first = mode;
        src = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
                                        x->plane[0].src.buf, src_stride);
        src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, block,
                                             x->plane[0].src_diff);
        coeff = BLOCK_OFFSET(x->plane[0].coeff, block, 16);
        dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
                                        xd->plane[0].dst.buf,
                                        xd->plane[0].dst.stride);
715
716
        vp9_intra4x4_predict(xd, block, BLOCK_SIZE_SB8X8, mode,
                             dst, xd->plane[0].dst.stride);
717
718
719
720
721
722
723
724
725
726
727
728
        vp9_subtract_block(4, 4, src_diff, 8,
                           src, src_stride,
                           dst, xd->plane[0].dst.stride);

        tx_type = get_tx_type_4x4(xd, block);
        if (tx_type != DCT_DCT) {
          vp9_short_fht4x4(src_diff, coeff, 8, tx_type);
          x->quantize_b_4x4(x, block, tx_type, 16);
        } else {
          x->fwd_txm4x4(src_diff, coeff, 16);
          x->quantize_b_4x4(x, block, tx_type, 16);
        }
John Koleszar's avatar
John Koleszar committed
729

730
731
732
733
        ratey += cost_coeffs(cm, x, 0, block, PLANE_TYPE_Y_WITH_DC,
                             tempa + idx, templ + idy, TX_4X4, 16);
        distortion += vp9_block_error(coeff, BLOCK_OFFSET(xd->plane[0].dqcoeff,
                                                         block, 16), 16) >> 2;
John Koleszar's avatar
John Koleszar committed
734

735
736
737
738
739
740
741
742
        if (best_tx_type != DCT_DCT)
          vp9_short_iht4x4_add(BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
                               dst, xd->plane[0].dst.stride, best_tx_type);
        else
          xd->inv_txm4x4_add(BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
                             dst, xd->plane[0].dst.stride);
      }
    }
Jingning Han's avatar
Jingning Han committed
743

744
745
    rate += ratey;
    this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
Jingning Han's avatar
Jingning Han committed
746

747
748
749
750
751
752
753
    if (this_rd < best_rd) {
      *bestrate = rate;
      *bestratey = ratey;
      *bestdistortion = distortion;
      best_rd = this_rd;
      *best_mode = mode;
      best_tx_type = tx_type;
754
755
756
757
758
759
760
761
762
763
      vpx_memcpy(a, tempa, sizeof(tempa));
      vpx_memcpy(l, templ, sizeof(templ));
      for (idy = 0; idy < bh; ++idy) {
        for (idx = 0; idx < bw; ++idx) {
          block = ib + idy * 2 + idx;
          vpx_memcpy(best_dqcoeff[idy * 2 + idx],
                     BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
                     sizeof(best_dqcoeff[0]));
        }
      }
John Koleszar's avatar
John Koleszar committed
764
    }
John Koleszar's avatar
John Koleszar committed
765
  }
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784

  for (idy = 0; idy < bh; ++idy) {
    for (idx = 0; idx < bw; ++idx) {
      block = ib + idy * 2 + idx;
      xd->mode_info_context->bmi[block].as_mode.first = *best_mode;
      dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
                                      xd->plane[0].dst.buf,
                                      xd->plane[0].dst.stride);

      vp9_intra4x4_predict(xd, block, BLOCK_SIZE_SB8X8, *best_mode,
                           dst, xd->plane[0].dst.stride);
      // inverse transform
      if (best_tx_type != DCT_DCT)
        vp9_short_iht4x4_add(best_dqcoeff[idy * 2 + idx], dst,
                             xd->plane[0].dst.stride, best_tx_type);
      else
        xd->inv_txm4x4_add(best_dqcoeff[idy * 2 + idx], dst,
                           xd->plane[0].dst.stride);
    }
Scott LaVarnway's avatar
Scott LaVarnway committed
785
  }
John Koleszar's avatar
John Koleszar committed
786

John Koleszar's avatar
John Koleszar committed
787
  return best_rd;
John Koleszar's avatar
John Koleszar committed
788
789
}

790
791
static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
                                         int *Rate, int *rate_y,
792
                                         int *Distortion, int64_t best_rd) {
793
  int i, j;
John Koleszar's avatar
John Koleszar committed
794
  MACROBLOCKD *const xd = &mb->e_mbd;
795
796
797
798
  BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type;
  int bw = 1 << b_width_log2(bsize);
  int bh = 1 << b_height_log2(bsize);
  int idx, idy;
799
  int cost = 0;
John Koleszar's avatar
John Koleszar committed
800
801
802
  int distortion = 0;
  int tot_rate_y = 0;
  int64_t total_rd = 0;
803
  ENTROPY_CONTEXT t_above[4], t_left[4];
John Koleszar's avatar
John Koleszar committed
804
  int *bmode_costs;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
805
  MODE_INFO *const mic = xd->mode_info_context;
John Koleszar's avatar
John Koleszar committed
806

807
808
  vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
  vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
John Koleszar's avatar
John Koleszar committed
809

810
  bmode_costs = mb->mbmode_cost;
811

812
813
814
  for (idy = 0; idy < 2; idy += bh) {
    for (idx = 0; idx < 2; idx += bw) {
      const int mis = xd->mode_info_stride;
Yaowu Xu's avatar
Yaowu Xu committed
815
      MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
816
817
818
819
820
821
822
823
824
      int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry);
      int UNINITIALIZED_IS_SAFE(d);
      i = idy * 2 + idx;

      if (xd->frame_type == KEY_FRAME) {
        const MB_PREDICTION_MODE A = above_block_mode(mic, i, mis);
        const MB_PREDICTION_MODE L = (xd->left_available || idx) ?
                                     left_block_mode(mic, i) : DC_PRED;

825
        bmode_costs  = mb->y_mode_costs[A][L];
826
      }
827

828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
      total_rd += rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs,
                                        t_above + idx, t_left + idy,
                                        &r, &ry, &d, bsize);
      cost += r;
      distortion += d;
      tot_rate_y += ry;

      mic->bmi[i].as_mode.first = best_mode;
      for (j = 1; j < bh; ++j)
        mic->bmi[i + j * 2].as_mode.first = best_mode;
      for (j = 1; j < bw; ++j)
        mic->bmi[i + j].as_mode.first = best_mode;

      if (total_rd >= best_rd)
        break;
John Koleszar's avatar
John Koleszar committed
843
844
    }
  }
John Koleszar's avatar
John Koleszar committed
845

846
  if (total_rd >= best_rd)
847
    return INT64_MAX;
848

John Koleszar's avatar
John Koleszar committed
849
  *Rate = cost;
850
  *rate_y = tot_rate_y;
John Koleszar's avatar
John Koleszar committed
851
  *Distortion = distortion;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
852
  xd->mode_info_context->mbmi.mode = mic->bmi[3].as_mode.first;
John Koleszar's avatar
John Koleszar committed
853

John Koleszar's avatar
John Koleszar committed
854
  return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
John Koleszar's avatar
John Koleszar committed
855
}
856

Ronald S. Bultje's avatar