vp9_rdopt.c 124 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
11
12
13
14
15
 */


#include <stdio.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
16

17
#include "vp9/common/vp9_pragmas.h"
18
19
20
21
22
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_modecosts.h"
#include "vp9/encoder/vp9_encodeintra.h"
23
24
25
26
27
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_quant_common.h"
28
29
30
31
32
33
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_variance.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_ratectrl.h"
John Koleszar's avatar
John Koleszar committed
34
#include "vpx_mem/vpx_mem.h"
35
36
37
38
39
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_entropy.h"
40
#include "vp9_rtcd.h"
41
#include "vp9/common/vp9_mvref_common.h"
Ronald S. Bultje's avatar
Ronald S. Bultje committed
42
#include "vp9/common/vp9_common.h"
Paul Wilkins's avatar
Paul Wilkins committed
43

44
45
#define INVALID_MV 0x80008000

46
47
48
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1

49
50
51
DECLARE_ALIGNED(16, extern const uint8_t,
                vp9_pt_energy_class[MAX_ENTROPY_TOKENS]);

Ronald S. Bultje's avatar
Ronald S. Bultje committed
52
#define I4X4_PRED 0x8000
Ronald S. Bultje's avatar
Ronald S. Bultje committed
53
#define SPLITMV 0x10000
Ronald S. Bultje's avatar
Ronald S. Bultje committed
54

55
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
56
57
  {ZEROMV,    LAST_FRAME,   NONE},
  {DC_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
58

59
60
  {NEARESTMV, LAST_FRAME,   NONE},
  {NEARMV,    LAST_FRAME,   NONE},
John Koleszar's avatar
John Koleszar committed
61

62
63
  {ZEROMV,    GOLDEN_FRAME, NONE},
  {NEARESTMV, GOLDEN_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
64

65
66
  {ZEROMV,    ALTREF_FRAME, NONE},
  {NEARESTMV, ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
67

68
69
  {NEARMV,    GOLDEN_FRAME, NONE},
  {NEARMV,    ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
70

71
72
73
74
75
76
77
78
  {V_PRED,    INTRA_FRAME,  NONE},
  {H_PRED,    INTRA_FRAME,  NONE},
  {D45_PRED,  INTRA_FRAME,  NONE},
  {D135_PRED, INTRA_FRAME,  NONE},
  {D117_PRED, INTRA_FRAME,  NONE},
  {D153_PRED, INTRA_FRAME,  NONE},
  {D27_PRED,  INTRA_FRAME,  NONE},
  {D63_PRED,  INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
79

80
  {TM_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
81

82
83
84
  {NEWMV,     LAST_FRAME,   NONE},
  {NEWMV,     GOLDEN_FRAME, NONE},
  {NEWMV,     ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
85

86
87
88
  {SPLITMV,   LAST_FRAME,   NONE},
  {SPLITMV,   GOLDEN_FRAME, NONE},
  {SPLITMV,   ALTREF_FRAME, NONE},
89

Ronald S. Bultje's avatar
Ronald S. Bultje committed
90
  {I4X4_PRED, INTRA_FRAME,  NONE},
91

John Koleszar's avatar
John Koleszar committed
92
  /* compound prediction modes */
Ronald S. Bultje's avatar
Ronald S. Bultje committed
93
94
95
  {ZEROMV,    LAST_FRAME,   ALTREF_FRAME},
  {NEARESTMV, LAST_FRAME,   ALTREF_FRAME},
  {NEARMV,    LAST_FRAME,   ALTREF_FRAME},
96

John Koleszar's avatar
John Koleszar committed
97
98
99
  {ZEROMV,    GOLDEN_FRAME, ALTREF_FRAME},
  {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
  {NEARMV,    GOLDEN_FRAME, ALTREF_FRAME},
100

Ronald S. Bultje's avatar
Ronald S. Bultje committed
101
  {NEWMV,     LAST_FRAME,   ALTREF_FRAME},
John Koleszar's avatar
John Koleszar committed
102
  {NEWMV,     GOLDEN_FRAME, ALTREF_FRAME},
103

Ronald S. Bultje's avatar
Ronald S. Bultje committed
104
  {SPLITMV,   LAST_FRAME,   ALTREF_FRAME},
105
  {SPLITMV,   GOLDEN_FRAME, ALTREF_FRAME},
John Koleszar's avatar
John Koleszar committed
106
107
};

108
109
110
111
112
113
114
115
116
117
118
// The baseline rd thresholds for breaking out of the rd loop for
// certain modes are assumed to be based on 8x8 blocks.
// This table is used to correct for blocks size.
// The factors here are << 2 (2 = x0.5, 32 = x8 etc).
static int rd_thresh_block_size_factor[BLOCK_SIZE_TYPES] =
  {2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32};

#define BASE_RD_THRESH_FREQ_FACT 16
#define MAX_RD_THRESH_FREQ_FACT 32
#define MAX_RD_THRESH_FREQ_INC 1

119
static void fill_token_costs(vp9_coeff_count (*c)[BLOCK_TYPES][2],
120
                             vp9_coeff_probs_model (*p)[BLOCK_TYPES]) {
121
  int i, j, k, l;
122
123
124
125
126
127
128
129
  TX_SIZE t;
  for (t = TX_4X4; t <= TX_32X32; t++)
    for (i = 0; i < BLOCK_TYPES; i++)
      for (j = 0; j < REF_TYPES; j++)
        for (k = 0; k < COEF_BANDS; k++)
          for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
            vp9_prob probs[ENTROPY_NODES];
            vp9_model_to_full_probs(p[t][i][j][k][l], probs);
130
            vp9_cost_tokens((int *)c[t][i][j][0][k][l], probs,
131
                            vp9_coef_tree);
132
#if CONFIG_BALANCED_COEFTREE
133
134
135
            // Replace the eob node prob with a very small value so that the
            // cost approximately equals the cost without the eob node
            probs[1] = 1;
136
            vp9_cost_tokens((int *)c[t][i][j][1][k][l], probs, vp9_coef_tree);
137
#else
138
            vp9_cost_tokens_skip((int *)c[t][i][j][1][k][l], probs,
139
                                 vp9_coef_tree);
140
141
            assert(c[t][i][j][0][k][l][DCT_EOB_TOKEN] ==
                   c[t][i][j][1][k][l][DCT_EOB_TOKEN]);
142
#endif
143
          }
144
145
}

146
147
148
149
static int rd_iifactor[32] =  { 4, 4, 3, 2, 1, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0, };
John Koleszar's avatar
John Koleszar committed
150

151
// 3* dc_qlookup[Q]*dc_qlookup[Q];
152

153
/* values are now correlated to quantizer */
Paul Wilkins's avatar
Paul Wilkins committed
154
155
156
static int sad_per_bit16lut[QINDEX_RANGE];
static int sad_per_bit4lut[QINDEX_RANGE];

157
void vp9_init_me_luts() {
John Koleszar's avatar
John Koleszar committed
158
159
160
161
162
163
164
  int i;

  // Initialize the sad lut tables using a formulaic calculation for now
  // This is to make it easier to resolve the impact of experimental changes
  // to the quantizer tables.
  for (i = 0; i < QINDEX_RANGE; i++) {
    sad_per_bit16lut[i] =
165
      (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
166
    sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742);
John Koleszar's avatar
John Koleszar committed
167
  }
Paul Wilkins's avatar
Paul Wilkins committed
168
}
John Koleszar's avatar
John Koleszar committed
169

170
static int compute_rd_mult(int qindex) {
171
  const int q = vp9_dc_quant(qindex, 0);
172
  return (11 * q * q) >> 2;
173
174
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
175
176
177
void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
  cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
  cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
John Koleszar's avatar
John Koleszar committed
178
179
}

180

Dmitry Kovalev's avatar
Dmitry Kovalev committed
181
void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
182
  int q, i, bsize;
John Koleszar's avatar
John Koleszar committed
183

184
  vp9_clear_system_state();  // __asm emms;
John Koleszar's avatar
John Koleszar committed
185

John Koleszar's avatar
John Koleszar committed
186
187
188
189
  // Further tests required to see if optimum is different
  // for key frames, golden frames and arf frames.
  // if (cpi->common.refresh_golden_frame ||
  //     cpi->common.refresh_alt_ref_frame)
190
  qindex = clamp(qindex, 0, MAXQ);
191

Dmitry Kovalev's avatar
Dmitry Kovalev committed
192
  cpi->RDMULT = compute_rd_mult(qindex);
John Koleszar's avatar
John Koleszar committed
193
194
195
196
197
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
    if (cpi->twopass.next_iiratio > 31)
      cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
    else
      cpi->RDMULT +=
Dmitry Kovalev's avatar
Dmitry Kovalev committed
198
          (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
John Koleszar's avatar
John Koleszar committed
199
  }
200
  cpi->mb.errorperbit = cpi->RDMULT >> 6;
John Koleszar's avatar
John Koleszar committed
201
  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
202

203
  vp9_set_speed_features(cpi);
John Koleszar's avatar
John Koleszar committed
204

Dmitry Kovalev's avatar
Dmitry Kovalev committed
205
206
  q = (int)pow(vp9_dc_quant(qindex, 0) >> 2, 1.25);
  q <<= 2;
John Koleszar's avatar
John Koleszar committed
207
208
  if (q < 8)
    q = 8;
209

John Koleszar's avatar
John Koleszar committed
210
211
212
  if (cpi->RDMULT > 1000) {
    cpi->RDDIV = 1;
    cpi->RDMULT /= 100;
John Koleszar's avatar
John Koleszar committed
213

214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
    for (bsize = 0; bsize < BLOCK_SIZE_TYPES; ++bsize) {
      for (i = 0; i < MAX_MODES; ++i) {
        // Threshold here seem unecessarily harsh but fine given actual
        // range of values used for cpi->sf.thresh_mult[]
        int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);

        // *4 relates to the scaling of rd_thresh_block_size_factor[]
        if ((int64_t)cpi->sf.thresh_mult[i] < thresh_max) {
          cpi->rd_threshes[bsize][i] =
            cpi->sf.thresh_mult[i] * q *
            rd_thresh_block_size_factor[bsize] / (4 * 100);
        } else {
          cpi->rd_threshes[bsize][i] = INT_MAX;
        }
        cpi->rd_baseline_thresh[bsize][i] = cpi->rd_threshes[bsize][i];
229

Paul Wilkins's avatar
Paul Wilkins committed
230
231
232
233
        if (cpi->sf.adpative_rd_thresh)
          cpi->rd_thresh_freq_fact[bsize][i] = MAX_RD_THRESH_FREQ_FACT;
        else
          cpi->rd_thresh_freq_fact[bsize][i] = BASE_RD_THRESH_FREQ_FACT;
John Koleszar's avatar
John Koleszar committed
234
      }
John Koleszar's avatar
John Koleszar committed
235
    }
John Koleszar's avatar
John Koleszar committed
236
237
  } else {
    cpi->RDDIV = 100;
John Koleszar's avatar
John Koleszar committed
238

239
240
241
242
243
244
245
246
247
248
249
250
251
252
    for (bsize = 0; bsize < BLOCK_SIZE_TYPES; ++bsize) {
      for (i = 0; i < MAX_MODES; i++) {
        // Threshold here seem unecessarily harsh but fine given actual
        // range of values used for cpi->sf.thresh_mult[]
        int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);

        if (cpi->sf.thresh_mult[i] < thresh_max) {
          cpi->rd_threshes[bsize][i] =
            cpi->sf.thresh_mult[i] * q *
            rd_thresh_block_size_factor[bsize] / 4;
        } else {
          cpi->rd_threshes[bsize][i] = INT_MAX;
        }
        cpi->rd_baseline_thresh[bsize][i] = cpi->rd_threshes[bsize][i];
Paul Wilkins's avatar
Paul Wilkins committed
253
254
255
256
257

        if (cpi->sf.adpative_rd_thresh)
          cpi->rd_thresh_freq_fact[bsize][i] = MAX_RD_THRESH_FREQ_FACT;
        else
          cpi->rd_thresh_freq_fact[bsize][i] = BASE_RD_THRESH_FREQ_FACT;
John Koleszar's avatar
John Koleszar committed
258
      }
John Koleszar's avatar
John Koleszar committed
259
    }
John Koleszar's avatar
John Koleszar committed
260
  }
John Koleszar's avatar
John Koleszar committed
261

262
  fill_token_costs(cpi->mb.token_costs, cpi->common.fc.coef_probs);
263

264
  for (i = 0; i < NUM_PARTITION_CONTEXTS; i++)
265
    vp9_cost_tokens(cpi->mb.partition_cost[i],
266
                    cpi->common.fc.partition_prob[cpi->common.frame_type][i],
267
268
                    vp9_partition_tree);

John Koleszar's avatar
John Koleszar committed
269
  /*rough estimate for costing*/
270
  vp9_init_mode_costs(cpi);
John Koleszar's avatar
John Koleszar committed
271

272
  if (cpi->common.frame_type != KEY_FRAME) {
273
    vp9_build_nmv_cost_table(
274
275
276
277
278
279
        cpi->mb.nmvjointcost,
        cpi->mb.e_mbd.allow_high_precision_mv ?
        cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
        &cpi->common.fc.nmvc,
        cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
  }
John Koleszar's avatar
John Koleszar committed
280
281
}

Ronald S. Bultje's avatar
Ronald S. Bultje committed
282
int64_t vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff,
283
                          intptr_t block_size, int64_t *ssz) {
Ronald S. Bultje's avatar
Ronald S. Bultje committed
284
  int i;
285
  int64_t error = 0, sqcoeff = 0;
John Koleszar's avatar
John Koleszar committed
286

287
  for (i = 0; i < block_size; i++) {
John Koleszar's avatar
John Koleszar committed
288
    int this_diff = coeff[i] - dqcoeff[i];
Ronald S. Bultje's avatar
Ronald S. Bultje committed
289
    error += (unsigned)this_diff * this_diff;
290
    sqcoeff += (unsigned) coeff[i] * coeff[i];
John Koleszar's avatar
John Koleszar committed
291
  }
John Koleszar's avatar
John Koleszar committed
292

293
  *ssz = sqcoeff;
John Koleszar's avatar
John Koleszar committed
294
  return error;
John Koleszar's avatar
John Koleszar committed
295
296
}

297
static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
298
                              int plane, int block, PLANE_TYPE type,
299
300
                              ENTROPY_CONTEXT *A,
                              ENTROPY_CONTEXT *L,
John Koleszar's avatar
John Koleszar committed
301
302
                              TX_SIZE tx_size,
                              int y_blocks) {
303
  MACROBLOCKD *const xd = &mb->e_mbd;
304
305
  MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
  int pt;
306
  int c = 0;
307
  int cost = 0, pad;
308
  const int16_t *scan, *nb;
309
  const int eob = xd->plane[plane].eobs[block];
310
  const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff, block, 16);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
311
  const int ref = mbmi->ref_frame[0] != INTRA_FRAME;
312
313
  unsigned int (*token_costs)[COEF_BANDS][PREV_COEF_CONTEXTS]
                    [MAX_ENTROPY_TOKENS] = mb->token_costs[tx_size][type][ref];
314
  ENTROPY_CONTEXT above_ec, left_ec;
315
  TX_TYPE tx_type = DCT_DCT;
316
  const int segment_id = xd->mode_info_context->mbmi.segment_id;
317
318
  int seg_eob, default_eob;
  uint8_t token_cache[1024];
Paul Wilkins's avatar
Paul Wilkins committed
319
  const uint8_t * band_translate;
320
321

  // Check for consistency of tx_size with mode info
322
  assert((!type && !plane) || (type && plane));
323
324
325
  if (type == PLANE_TYPE_Y_WITH_DC) {
    assert(xd->mode_info_context->mbmi.txfm_size == tx_size);
  } else {
326
    TX_SIZE tx_size_uv = get_uv_tx_size(mbmi);
327
328
329
    assert(tx_size == tx_size_uv);
  }

330
  switch (tx_size) {
331
    case TX_4X4: {
332
      tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
333
          get_tx_type_4x4(xd, block) : DCT_DCT;
334
335
      above_ec = A[0] != 0;
      left_ec = L[0] != 0;
336
      seg_eob = 16;
337
      scan = get_scan_4x4(tx_type);
Paul Wilkins's avatar
Paul Wilkins committed
338
      band_translate = vp9_coefband_trans_4x4;
Daniel Kang's avatar
Daniel Kang committed
339
      break;
340
    }
341
    case TX_8X8: {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
342
343
      const TX_TYPE tx_type = type == PLANE_TYPE_Y_WITH_DC ?
                                  get_tx_type_8x8(xd) : DCT_DCT;
344
345
      above_ec = (A[0] + A[1]) != 0;
      left_ec = (L[0] + L[1]) != 0;
346
      scan = get_scan_8x8(tx_type);
347
      seg_eob = 64;
Paul Wilkins's avatar
Paul Wilkins committed
348
      band_translate = vp9_coefband_trans_8x8plus;
Daniel Kang's avatar
Daniel Kang committed
349
      break;
350
351
    }
    case TX_16X16: {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
352
353
      const TX_TYPE tx_type = type == PLANE_TYPE_Y_WITH_DC ?
                                  get_tx_type_16x16(xd) : DCT_DCT;
354
      scan = get_scan_16x16(tx_type);
355
      seg_eob = 256;
356
357
      above_ec = (A[0] + A[1] + A[2] + A[3]) != 0;
      left_ec = (L[0] + L[1] + L[2] + L[3]) != 0;
Paul Wilkins's avatar
Paul Wilkins committed
358
      band_translate = vp9_coefband_trans_8x8plus;
Daniel Kang's avatar
Daniel Kang committed
359
      break;
360
    }
361
    case TX_32X32:
Paul Wilkins's avatar
Paul Wilkins committed
362
      scan = vp9_default_scan_32x32;
363
      seg_eob = 1024;
364
365
      above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0;
      left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0;
Paul Wilkins's avatar
Paul Wilkins committed
366
      band_translate = vp9_coefband_trans_8x8plus;
367
      break;
Daniel Kang's avatar
Daniel Kang committed
368
    default:
Dmitry Kovalev's avatar
Dmitry Kovalev committed
369
      assert(0);
Daniel Kang's avatar
Daniel Kang committed
370
371
      break;
  }
John Koleszar's avatar
John Koleszar committed
372
  assert(eob <= seg_eob);
373

374
  pt = combine_entropy_contexts(above_ec, left_ec);
375
376
  nb = vp9_get_coef_neighbors_handle(scan, &pad);
  default_eob = seg_eob;
377

378
379
  if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))
    seg_eob = 0;
380

381
382
383
384
  /* sanity check to ensure that we do not have spurious non-zero q values */
  if (eob < seg_eob)
    assert(qcoeff_ptr[scan[eob]] == 0);

385
386
387
388
  if (eob == 0) {
    // single eob token
    cost += token_costs[0][0][pt][DCT_EOB_TOKEN];
  } else {
389
    int v, prev_t;
390
391
392

    // dc token
    v = qcoeff_ptr[0];
393
394
395
    prev_t = vp9_dct_value_tokens_ptr[v].token;
    cost += token_costs[0][0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
    token_cache[0] = vp9_pt_energy_class[prev_t];
396
397
398
399

    // ac tokens
    for (c = 1; c < eob; c++) {
      const int rc = scan[c];
400
401
      const int band = get_coef_band(band_translate, c);
      int t;
402
403
404

      v = qcoeff_ptr[rc];
      t = vp9_dct_value_tokens_ptr[v].token;
405
      pt = get_coef_context(scan, nb, pad, token_cache, c, default_eob);
406
      cost += token_costs[!prev_t][band][pt][t] + vp9_dct_value_cost_ptr[v];
407
      token_cache[rc] = vp9_pt_energy_class[t];
408
      prev_t = t;
409
    }
410
411

    // eob token
412
    if (c < seg_eob) {
413
      pt = get_coef_context(scan, nb, pad, token_cache, c, default_eob);
414
415
      cost += token_costs[0][get_coef_band(band_translate, c)][pt]
                         [DCT_EOB_TOKEN];
416
    }
417
418
  }

419
420
421
  // is eob first coefficient;
  for (pt = 0; pt < (1 << tx_size); pt++) {
    A[pt] = L[pt] = c > 0;
422
  }
423

424
425
426
  return cost;
}

427
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
428
                                     int (*r)[2], int *rate,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
429
                                     int64_t *d, int64_t *distortion,
430
431
432
                                     int *s, int *skip,
                                     int64_t txfm_cache[NB_TXFM_MODES],
                                     TX_SIZE max_txfm_size) {
433
434
435
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
436
  vp9_prob skip_prob = vp9_get_pred_prob(cm, xd, PRED_MBSKIP);
437
438
  int64_t rd[TX_SIZE_MAX_SB][2];
  int n, m;
439
  int s0, s1;
440

441
  const vp9_prob *tx_probs = vp9_get_pred_probs(cm, xd, PRED_TX_SIZE);
442

443
444
445
446
  for (n = TX_4X4; n <= max_txfm_size; n++) {
    r[n][1] = r[n][0];
    for (m = 0; m <= n - (n == max_txfm_size); m++) {
      if (m == n)
447
        r[n][1] += vp9_cost_zero(tx_probs[m]);
448
      else
449
        r[n][1] += vp9_cost_one(tx_probs[m]);
450
451
    }
  }
452

453
454
455
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
456

457
458
459
460
461
462
  for (n = TX_4X4; n <= max_txfm_size; n++) {
    if (s[n]) {
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
463
464
465
    }
  }

466
467
468
469
470
471
  if (max_txfm_size == TX_32X32 &&
      (cm->txfm_mode == ALLOW_32X32 ||
       (cm->txfm_mode == TX_MODE_SELECT &&
        rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
        rd[TX_32X32][1] < rd[TX_4X4][1]))) {
    mbmi->txfm_size = TX_32X32;
472
473
474
475
476
477
  } else if (max_txfm_size >= TX_16X16 &&
             (cm->txfm_mode == ALLOW_16X16 ||
              cm->txfm_mode == ALLOW_32X32 ||
              (cm->txfm_mode == TX_MODE_SELECT &&
               rd[TX_16X16][1] < rd[TX_8X8][1] &&
               rd[TX_16X16][1] < rd[TX_4X4][1]))) {
478
    mbmi->txfm_size = TX_16X16;
479
  } else if (cm->txfm_mode == ALLOW_8X8 ||
480
481
             cm->txfm_mode == ALLOW_16X16 ||
             cm->txfm_mode == ALLOW_32X32 ||
482
           (cm->txfm_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
483
484
485
486
487
    mbmi->txfm_size = TX_8X8;
  } else {
    mbmi->txfm_size = TX_4X4;
  }

488
  *distortion = d[mbmi->txfm_size];
489
  *rate       = r[mbmi->txfm_size][cm->txfm_mode == TX_MODE_SELECT];
490
491
  *skip       = s[mbmi->txfm_size];

492
493
  txfm_cache[ONLY_4X4] = rd[TX_4X4][0];
  txfm_cache[ALLOW_8X8] = rd[TX_8X8][0];
494
495
  txfm_cache[ALLOW_16X16] = rd[MIN(max_txfm_size, TX_16X16)][0];
  txfm_cache[ALLOW_32X32] = rd[MIN(max_txfm_size, TX_32X32)][0];
496
497
498
499
  if (max_txfm_size == TX_32X32 &&
      rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
      rd[TX_32X32][1] < rd[TX_4X4][1])
    txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
500
501
  else if (max_txfm_size >= TX_16X16 &&
           rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
502
    txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
503
  else
504
505
    txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
                                 rd[TX_4X4][1] : rd[TX_8X8][1];
506
507
}

Ronald S. Bultje's avatar
Ronald S. Bultje committed
508
static int64_t block_error_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize,
509
                               int shift, int64_t *sse) {
510
511
512
  struct macroblockd_plane *p = &x->e_mbd.plane[0];
  const int bw = plane_block_width(bsize, p);
  const int bh = plane_block_height(bsize, p);
513
514
515
516
  int64_t e = vp9_block_error(x->plane[0].coeff, x->e_mbd.plane[0].dqcoeff,
                              bw * bh, sse) >> shift;
  *sse >>= shift;
  return e;
517
}
518

Ronald S. Bultje's avatar
Ronald S. Bultje committed
519
static int64_t block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize,
520
521
                                int shift, int64_t *sse) {
  int64_t sum = 0, this_sse;
522
  int plane;
523

524
  *sse = 0;
525
  for (plane = 1; plane < MAX_MB_PLANE; plane++) {
526
527
528
    struct macroblockd_plane *p = &x->e_mbd.plane[plane];
    const int bw = plane_block_width(bsize, p);
    const int bh = plane_block_height(bsize, p);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
529
    sum += vp9_block_error(x->plane[plane].coeff, x->e_mbd.plane[plane].dqcoeff,
530
531
                           bw * bh, &this_sse);
    *sse += this_sse;
532
  }
533
  *sse >>= shift;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
534
  return sum >> shift;
535
536
}

537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
struct rdcost_block_args {
  VP9_COMMON *cm;
  MACROBLOCK *x;
  ENTROPY_CONTEXT t_above[16];
  ENTROPY_CONTEXT t_left[16];
  TX_SIZE tx_size;
  int bw;
  int bh;
  int cost;
};

static void rdcost_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
                         int ss_txfrm_size, void *arg) {
  struct rdcost_block_args* args = arg;
  int x_idx, y_idx;
  MACROBLOCKD * const xd = &args->x->e_mbd;

  txfrm_block_to_raster_xy(xd, bsize, plane, block, args->tx_size * 2, &x_idx,
                           &y_idx);

  args->cost += cost_coeffs(args->cm, args->x, plane, block,
                            xd->plane[plane].plane_type, args->t_above + x_idx,
                            args->t_left + y_idx, args->tx_size,
                            args->bw * args->bh);
}

static int rdcost_plane(VP9_COMMON * const cm, MACROBLOCK *x, int plane,
                        BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
  MACROBLOCKD * const xd = &x->e_mbd;
566
567
568
  const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
  const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y;
  const int bw = 1 << bwl, bh = 1 << bhl;
569
  struct rdcost_block_args args = { cm, x, { 0 }, { 0 }, tx_size, bw, bh, 0 };
570

571
  vpx_memcpy(&args.t_above, xd->plane[plane].above_context,
572
             sizeof(ENTROPY_CONTEXT) * bw);
573
  vpx_memcpy(&args.t_left, xd->plane[plane].left_context,
574
             sizeof(ENTROPY_CONTEXT) * bh);
575

576
  foreach_transformed_block_in_plane(xd, bsize, plane, rdcost_block, &args);
577

578
  return args.cost;
579
580
}

581
582
583
static int rdcost_uv(VP9_COMMON *const cm, MACROBLOCK *x,
                     BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
  int cost = 0, plane;
584

585
586
  for (plane = 1; plane < MAX_MB_PLANE; plane++) {
    cost += rdcost_plane(cm, x, plane, bsize, tx_size);
587
588
  }
  return cost;
589
590
}

591
static void super_block_yrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
592
                                     int *rate, int64_t *distortion,
593
                                     int *skippable, int64_t *sse,
594
                                     BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
595
  MACROBLOCKD *const xd = &x->e_mbd;
596
  xd->mode_info_context->mbmi.txfm_size = tx_size;
597

Ronald S. Bultje's avatar
Ronald S. Bultje committed
598
  if (xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME)
599
600
601
    vp9_encode_intra_block_y(cm, x, bsize);
  else
    vp9_xform_quant_sby(cm, x, bsize);
602

603
  *distortion = block_error_sby(x, bsize, tx_size == TX_32X32 ? 0 : 2, sse);
604
  *rate       = rdcost_plane(cm, x, 0, bsize, tx_size);
605
  *skippable  = vp9_sby_is_skippable(xd, bsize);
606
607
}

608
static void super_block_yrd(VP9_COMP *cpi,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
609
                            MACROBLOCK *x, int *rate, int64_t *distortion,
610
                            int *skip, int64_t *psse, BLOCK_SIZE_TYPE bs,
611
                            int64_t txfm_cache[NB_TXFM_MODES]) {
612
  VP9_COMMON *const cm = &cpi->common;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
613
  int r[TX_SIZE_MAX_SB][2], s[TX_SIZE_MAX_SB];
614
  int64_t d[TX_SIZE_MAX_SB], sse[TX_SIZE_MAX_SB];
Jim Bankoski's avatar
Jim Bankoski committed
615
616
  MACROBLOCKD *xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
617

618
  assert(bs == mbmi->sb_type);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
619
  if (mbmi->ref_frame[0] > INTRA_FRAME)
620
    vp9_subtract_sby(x, bs);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
621

622
  if (cpi->sf.use_largest_txform) {
Jim Bankoski's avatar
Jim Bankoski committed
623
624
625
626
627
628
629
630
631
    if (bs >= BLOCK_SIZE_SB32X32) {
      mbmi->txfm_size = TX_32X32;
    } else if (bs >= BLOCK_SIZE_MB16X16) {
      mbmi->txfm_size = TX_16X16;
    } else if (bs >= BLOCK_SIZE_SB8X8) {
      mbmi->txfm_size = TX_8X8;
    } else {
      mbmi->txfm_size = TX_4X4;
    }
632
    vpx_memset(txfm_cache, 0, NB_TXFM_MODES * sizeof(int64_t));
633
    super_block_yrd_for_txfm(cm, x, rate, distortion, skip, &sse[0], bs,
Jim Bankoski's avatar
Jim Bankoski committed
634
                             mbmi->txfm_size);
635
636
    if (psse)
      *psse = sse[0];
Jim Bankoski's avatar
Jim Bankoski committed
637
638
    return;
  }
639
  if (bs >= BLOCK_SIZE_SB32X32)
640
    super_block_yrd_for_txfm(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32],
641
                             &sse[TX_32X32], bs, TX_32X32);
642
  if (bs >= BLOCK_SIZE_MB16X16)
643
    super_block_yrd_for_txfm(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16],
644
645
646
647
648
                             &sse[TX_16X16], bs, TX_16X16);
  super_block_yrd_for_txfm(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8],
                           &sse[TX_8X8], bs, TX_8X8);
  super_block_yrd_for_txfm(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4],
                           &sse[TX_4X4], bs, TX_4X4);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
649

650
  choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s,
651
                           skip, txfm_cache,
652
                           TX_32X32 - (bs < BLOCK_SIZE_SB32X32)
Jingning Han's avatar
Jingning Han committed
653
                           - (bs < BLOCK_SIZE_MB16X16));
654
655
  if (psse)
    *psse = sse[mbmi->txfm_size];
Ronald S. Bultje's avatar
Ronald S. Bultje committed
656
}
Ronald S. Bultje's avatar
Ronald S. Bultje committed
657

658
static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
Yaowu Xu's avatar
Yaowu Xu committed
659
                                     MB_PREDICTION_MODE *best_mode,
660
661
662
                                     int *bmode_costs,
                                     ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                                     int *bestrate, int *bestratey,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
663
                                     int64_t *bestdistortion,
664
665
                                     BLOCK_SIZE_TYPE bsize) {
  MB_PREDICTION_MODE mode;
Deb Mukherjee's avatar
Deb Mukherjee committed
666
  MACROBLOCKD *xd = &x->e_mbd;
667
  int64_t best_rd = INT64_MAX;
John Koleszar's avatar
John Koleszar committed
668
  int rate = 0;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
669
  int64_t distortion;
670
  VP9_COMMON *const cm = &cpi->common;
671
672
673
  struct macroblock_plane *p = &x->plane[0];
  struct macroblockd_plane *pd = &xd->plane[0];
  const int src_stride = p->src.stride;
674
675
676
677
678
  uint8_t *src, *dst;
  int16_t *src_diff, *coeff;

  ENTROPY_CONTEXT ta[2], tempa[2];
  ENTROPY_CONTEXT tl[2], templ[2];
Deb Mukherjee's avatar
Deb Mukherjee committed
679
680
  TX_TYPE tx_type = DCT_DCT;
  TX_TYPE best_tx_type = DCT_DCT;
681
682
683
684
  int bw = 1 << b_width_log2(bsize);
  int bh = 1 << b_height_log2(bsize);
  int idx, idy, block;
  DECLARE_ALIGNED(16, int16_t, best_dqcoeff[4][16]);
John Koleszar's avatar
John Koleszar committed
685

Jingning Han's avatar
Jingning Han committed
686
  assert(ib < 4);
687

688
689
  vpx_memcpy(ta, a, sizeof(ta));
  vpx_memcpy(tl, l, sizeof(tl));
690
  xd->mode_info_context->mbmi.txfm_size = TX_4X4;
691
692

  for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
693
    int64_t this_rd;
694
    int ratey = 0;
695

696
    rate = bmode_costs[mode];
697
698
699
700
701
702
703
    distortion = 0;

    vpx_memcpy(tempa, ta, sizeof(ta));
    vpx_memcpy(templ, tl, sizeof(tl));

    for (idy = 0; idy < bh; ++idy) {
      for (idx = 0; idx < bw; ++idx) {
704
705
        int64_t ssz;

706
707
708
        block = ib + idy * 2 + idx;
        xd->mode_info_context->bmi[block].as_mode.first = mode;
        src = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
709
                                        p->src.buf, src_stride);
710
        src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, block,
711
                                             p->src_diff);
712
713
        coeff = BLOCK_OFFSET(x->plane[0].coeff, block, 16);
        dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
714
715
                                        pd->dst.buf,
                                        pd->dst.stride);
Jingning Han's avatar
Jingning Han committed
716
        vp9_predict_intra_block(xd, block, b_width_log2(BLOCK_SIZE_SB8X8),
717
718
719
                                TX_4X4, mode,
                                dst, pd->dst.stride,
                                dst, pd->dst.stride);
720
721
        vp9_subtract_block(4, 4, src_diff, 8,
                           src, src_stride,
722
                           dst, pd->dst.stride);
723
724
725
726
727
728
729
730
731

        tx_type = get_tx_type_4x4(xd, block);
        if (tx_type != DCT_DCT) {
          vp9_short_fht4x4(src_diff, coeff, 8, tx_type);
          x->quantize_b_4x4(x, block, tx_type, 16);
        } else {
          x->fwd_txm4x4(src_diff, coeff, 16);
          x->quantize_b_4x4(x, block, tx_type, 16);
        }
John Koleszar's avatar
John Koleszar committed
732

733
734
        ratey += cost_coeffs(cm, x, 0, block, PLANE_TYPE_Y_WITH_DC,
                             tempa + idx, templ + idy, TX_4X4, 16);
735
        distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff,
736
737
                                                          block, 16),
                                      16, &ssz) >> 2;
John Koleszar's avatar
John Koleszar committed
738

739
        if (best_tx_type != DCT_DCT)
740
741
          vp9_short_iht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block, 16),
                               dst, pd->dst.stride, best_tx_type);
742
        else
743
744
          xd->inv_txm4x4_add(BLOCK_OFFSET(pd->dqcoeff, block, 16),
                             dst, pd->dst.stride);
745
746
      }
    }
Jingning Han's avatar
Jingning Han committed
747

748
749
    rate += ratey;
    this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
Jingning Han's avatar
Jingning Han committed
750

751
752
753
754
755
756
757
    if (this_rd < best_rd) {
      *bestrate = rate;
      *bestratey = ratey;
      *bestdistortion = distortion;
      best_rd = this_rd;
      *best_mode = mode;
      best_tx_type = tx_type;
758
759
760
761
762
763
      vpx_memcpy(a, tempa, sizeof(tempa));
      vpx_memcpy(l, templ, sizeof(templ));
      for (idy = 0; idy < bh; ++idy) {
        for (idx = 0; idx < bw; ++idx) {
          block = ib + idy * 2 + idx;
          vpx_memcpy(best_dqcoeff[idy * 2 + idx],
764
                     BLOCK_OFFSET(pd->dqcoeff, block, 16),
765
766
767
                     sizeof(best_dqcoeff[0]));
        }
      }
John Koleszar's avatar
John Koleszar committed
768
    }
John Koleszar's avatar
John Koleszar committed
769
  }
770
771
772
773
774
775

  for (idy = 0; idy < bh; ++idy) {
    for (idx = 0; idx < bw; ++idx) {
      block = ib + idy * 2 + idx;
      xd->mode_info_context->bmi[block].as_mode.first = *best_mode;
      dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
776
777
                                      pd->dst.buf,
                                      pd->dst.stride);
778

Jingning Han's avatar
Jingning Han committed
779
      vp9_predict_intra_block(xd, block, b_width_log2(BLOCK_SIZE_SB8X8), TX_4X4,
780
781
                              *best_mode, dst, pd->dst.stride,
                              dst, pd->dst.stride);
782
783
784
      // inverse transform
      if (best_tx_type != DCT_DCT)
        vp9_short_iht4x4_add(best_dqcoeff[idy * 2 + idx], dst,
785
                            pd->dst.stride, best_tx_type);
786
787
      else
        xd->inv_txm4x4_add(best_dqcoeff[idy * 2 + idx], dst,
788
                           pd->dst.stride);
789
    }
Scott LaVarnway's avatar
Scott LaVarnway committed
790
  }
John Koleszar's avatar
John Koleszar committed
791

John Koleszar's avatar
John Koleszar committed
792
  return best_rd;
John Koleszar's avatar
John Koleszar committed
793
794
}

795
796
static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
                                         int *Rate, int *rate_y,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
797
                                         int64_t *Distortion, int64_t best_rd) {
798
  int i, j;
John Koleszar's avatar
John Koleszar committed
799
  MACROBLOCKD *const xd = &mb->e_mbd;
800
801
802
803
  BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type;
  int bw = 1 << b_width_log2(bsize);
  int bh = 1 << b_height_log2(bsize);
  int idx, idy;
804
  int cost = 0;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
805
  int64_t distortion = 0;
John Koleszar's avatar
John Koleszar committed
806
807
  int tot_rate_y = 0;
  int64_t total_rd = 0;
808
  ENTROPY_CONTEXT t_above[4], t_left[4];
John Koleszar's avatar
John Koleszar committed
809
  int *bmode_costs;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
810
  MODE_INFO *const mic = xd->mode_info_context;
John Koleszar's avatar
John Koleszar committed
811

812
813
  vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
  vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));