vp9_rdopt.c 171 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
11
12
13
14
15
 */


#include <stdio.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
16
17
#include "vp9/common/vp9_pragmas.h"

18
19
20
21
22
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_modecosts.h"
#include "vp9/encoder/vp9_encodeintra.h"
23
24
25
26
27
28
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_reconintra4x4.h"
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_quant_common.h"
29
30
31
32
33
34
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_variance.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_ratectrl.h"
John Koleszar's avatar
John Koleszar committed
35
#include "vpx_mem/vpx_mem.h"
36
37
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
John Koleszar's avatar
John Koleszar committed
38

39
40
41
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_entropy.h"
42
#include "vp9_rtcd.h"
43
#include "vp9/common/vp9_mvref_common.h"
Paul Wilkins's avatar
Paul Wilkins committed
44

John Koleszar's avatar
John Koleszar committed
45
46
#define MAXF(a,b)            (((a) > (b)) ? (a) : (b))

47
48
#define INVALID_MV 0x80008000

49
50
51
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1

John Koleszar's avatar
John Koleszar committed
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
static const int auto_speed_thresh[17] = {
  1000,
  200,
  150,
  130,
  150,
  125,
  120,
  115,
  115,
  115,
  115,
  115,
  115,
  115,
  115,
  115,
  105
John Koleszar's avatar
John Koleszar committed
70
71
};

72
#if CONFIG_PRED_FILTER
73
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
  {ZEROMV,    LAST_FRAME,   NONE,  0},
  {ZEROMV,    LAST_FRAME,   NONE,  1},
  {DC_PRED,   INTRA_FRAME,  NONE,  0},

  {NEARESTMV, LAST_FRAME,   NONE,  0},
  {NEARESTMV, LAST_FRAME,   NONE,  1},
  {NEARMV,    LAST_FRAME,   NONE,  0},
  {NEARMV,    LAST_FRAME,   NONE,  1},

  {ZEROMV,    GOLDEN_FRAME, NONE,  0},
  {ZEROMV,    GOLDEN_FRAME, NONE,  1},
  {NEARESTMV, GOLDEN_FRAME, NONE,  0},
  {NEARESTMV, GOLDEN_FRAME, NONE,  1},

  {ZEROMV,    ALTREF_FRAME, NONE,  0},
  {ZEROMV,    ALTREF_FRAME, NONE,  1},
  {NEARESTMV, ALTREF_FRAME, NONE,  0},
  {NEARESTMV, ALTREF_FRAME, NONE,  1},

  {NEARMV,    GOLDEN_FRAME, NONE,  0},
  {NEARMV,    GOLDEN_FRAME, NONE,  1},
  {NEARMV,    ALTREF_FRAME, NONE,  0},
  {NEARMV,    ALTREF_FRAME, NONE,  1},

  {V_PRED,    INTRA_FRAME,  NONE,  0},
  {H_PRED,    INTRA_FRAME,  NONE,  0},
  {D45_PRED,  INTRA_FRAME,  NONE,  0},
  {D135_PRED, INTRA_FRAME,  NONE,  0},
  {D117_PRED, INTRA_FRAME,  NONE,  0},
  {D153_PRED, INTRA_FRAME,  NONE,  0},
  {D27_PRED,  INTRA_FRAME,  NONE,  0},
  {D63_PRED,  INTRA_FRAME,  NONE,  0},

  {TM_PRED,   INTRA_FRAME,  NONE,  0},

  {NEWMV,     LAST_FRAME,   NONE,  0},
  {NEWMV,     LAST_FRAME,   NONE,  1},
  {NEWMV,     GOLDEN_FRAME, NONE,  0},
  {NEWMV,     GOLDEN_FRAME, NONE,  1},
  {NEWMV,     ALTREF_FRAME, NONE,  0},
  {NEWMV,     ALTREF_FRAME, NONE,  1},

  {SPLITMV,   LAST_FRAME,   NONE,  0},
  {SPLITMV,   GOLDEN_FRAME, NONE,  0},
  {SPLITMV,   ALTREF_FRAME, NONE,  0},

  {B_PRED,    INTRA_FRAME,  NONE,  0},
  {I8X8_PRED, INTRA_FRAME,  NONE,  0},
John Koleszar's avatar
John Koleszar committed
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141

  /* compound prediction modes */
  {ZEROMV,    LAST_FRAME,   GOLDEN_FRAME, 0},
  {NEARESTMV, LAST_FRAME,   GOLDEN_FRAME, 0},
  {NEARMV,    LAST_FRAME,   GOLDEN_FRAME, 0},

  {ZEROMV,    ALTREF_FRAME, LAST_FRAME,   0},
  {NEARESTMV, ALTREF_FRAME, LAST_FRAME,   0},
  {NEARMV,    ALTREF_FRAME, LAST_FRAME,   0},

  {ZEROMV,    GOLDEN_FRAME, ALTREF_FRAME, 0},
  {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME, 0},
  {NEARMV,    GOLDEN_FRAME, ALTREF_FRAME, 0},

  {NEWMV,     LAST_FRAME,   GOLDEN_FRAME, 0},
  {NEWMV,     ALTREF_FRAME, LAST_FRAME,   0},
  {NEWMV,     GOLDEN_FRAME, ALTREF_FRAME, 0},

  {SPLITMV,   LAST_FRAME,   GOLDEN_FRAME, 0},
  {SPLITMV,   ALTREF_FRAME, LAST_FRAME,   0},
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
  {SPLITMV,   GOLDEN_FRAME, ALTREF_FRAME, 0},

#if CONFIG_COMP_INTERINTRA_PRED
  /* compound inter-intra prediction */
  {ZEROMV,    LAST_FRAME,   INTRA_FRAME, 0},
  {NEARESTMV, LAST_FRAME,   INTRA_FRAME, 0},
  {NEARMV,    LAST_FRAME,   INTRA_FRAME, 0},
  {NEWMV,     LAST_FRAME,   INTRA_FRAME, 0},

  {ZEROMV,    GOLDEN_FRAME,   INTRA_FRAME, 0},
  {NEARESTMV, GOLDEN_FRAME,   INTRA_FRAME, 0},
  {NEARMV,    GOLDEN_FRAME,   INTRA_FRAME, 0},
  {NEWMV,     GOLDEN_FRAME,   INTRA_FRAME, 0},

  {ZEROMV,    ALTREF_FRAME,   INTRA_FRAME, 0},
  {NEARESTMV, ALTREF_FRAME,   INTRA_FRAME, 0},
  {NEARMV,    ALTREF_FRAME,   INTRA_FRAME, 0},
  {NEWMV,     ALTREF_FRAME,   INTRA_FRAME, 0},
#endif
John Koleszar's avatar
John Koleszar committed
161
};
162
#else
163
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
164
165
  {ZEROMV,    LAST_FRAME,   NONE},
  {DC_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
166

167
168
  {NEARESTMV, LAST_FRAME,   NONE},
  {NEARMV,    LAST_FRAME,   NONE},
John Koleszar's avatar
John Koleszar committed
169

170
171
  {ZEROMV,    GOLDEN_FRAME, NONE},
  {NEARESTMV, GOLDEN_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
172

173
174
  {ZEROMV,    ALTREF_FRAME, NONE},
  {NEARESTMV, ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
175

176
177
  {NEARMV,    GOLDEN_FRAME, NONE},
  {NEARMV,    ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
178

179
180
181
182
183
184
185
186
  {V_PRED,    INTRA_FRAME,  NONE},
  {H_PRED,    INTRA_FRAME,  NONE},
  {D45_PRED,  INTRA_FRAME,  NONE},
  {D135_PRED, INTRA_FRAME,  NONE},
  {D117_PRED, INTRA_FRAME,  NONE},
  {D153_PRED, INTRA_FRAME,  NONE},
  {D27_PRED,  INTRA_FRAME,  NONE},
  {D63_PRED,  INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
187

188
  {TM_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
189

190
191
192
  {NEWMV,     LAST_FRAME,   NONE},
  {NEWMV,     GOLDEN_FRAME, NONE},
  {NEWMV,     ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
193

194
195
196
  {SPLITMV,   LAST_FRAME,   NONE},
  {SPLITMV,   GOLDEN_FRAME, NONE},
  {SPLITMV,   ALTREF_FRAME, NONE},
197

198
199
  {B_PRED,    INTRA_FRAME,  NONE},
  {I8X8_PRED, INTRA_FRAME,  NONE},
200

John Koleszar's avatar
John Koleszar committed
201
202
203
204
  /* compound prediction modes */
  {ZEROMV,    LAST_FRAME,   GOLDEN_FRAME},
  {NEARESTMV, LAST_FRAME,   GOLDEN_FRAME},
  {NEARMV,    LAST_FRAME,   GOLDEN_FRAME},
205

John Koleszar's avatar
John Koleszar committed
206
207
208
  {ZEROMV,    ALTREF_FRAME, LAST_FRAME},
  {NEARESTMV, ALTREF_FRAME, LAST_FRAME},
  {NEARMV,    ALTREF_FRAME, LAST_FRAME},
209

John Koleszar's avatar
John Koleszar committed
210
211
212
  {ZEROMV,    GOLDEN_FRAME, ALTREF_FRAME},
  {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
  {NEARMV,    GOLDEN_FRAME, ALTREF_FRAME},
213

John Koleszar's avatar
John Koleszar committed
214
215
216
  {NEWMV,     LAST_FRAME,   GOLDEN_FRAME},
  {NEWMV,     ALTREF_FRAME, LAST_FRAME  },
  {NEWMV,     GOLDEN_FRAME, ALTREF_FRAME},
217

John Koleszar's avatar
John Koleszar committed
218
219
  {SPLITMV,   LAST_FRAME,   GOLDEN_FRAME},
  {SPLITMV,   ALTREF_FRAME, LAST_FRAME  },
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
  {SPLITMV,   GOLDEN_FRAME, ALTREF_FRAME},

#if CONFIG_COMP_INTERINTRA_PRED
  /* compound inter-intra prediction */
  {ZEROMV,    LAST_FRAME,   INTRA_FRAME},
  {NEARESTMV, LAST_FRAME,   INTRA_FRAME},
  {NEARMV,    LAST_FRAME,   INTRA_FRAME},
  {NEWMV,     LAST_FRAME,   INTRA_FRAME},

  {ZEROMV,    GOLDEN_FRAME,   INTRA_FRAME},
  {NEARESTMV, GOLDEN_FRAME,   INTRA_FRAME},
  {NEARMV,    GOLDEN_FRAME,   INTRA_FRAME},
  {NEWMV,     GOLDEN_FRAME,   INTRA_FRAME},

  {ZEROMV,    ALTREF_FRAME,   INTRA_FRAME},
  {NEARESTMV, ALTREF_FRAME,   INTRA_FRAME},
  {NEARMV,    ALTREF_FRAME,   INTRA_FRAME},
  {NEWMV,     ALTREF_FRAME,   INTRA_FRAME},
#endif
John Koleszar's avatar
John Koleszar committed
239
};
240
#endif
John Koleszar's avatar
John Koleszar committed
241

242
243
244
static void fill_token_costs(vp9_coeff_count *c,
                             vp9_coeff_probs *p,
                             int block_type_counts) {
John Koleszar's avatar
John Koleszar committed
245
246
247
248
249
250
  int i, j, k;

  for (i = 0; i < block_type_counts; i++)
    for (j = 0; j < COEF_BANDS; j++)
      for (k = 0; k < PREV_COEF_CONTEXTS; k++) {
        if (k == 0 && ((j > 0 && i > 0) || (j > 1 && i == 0)))
251
252
          vp9_cost_tokens_skip((int *)(c[i][j][k]),
                               p[i][j][k],
253
                               vp9_coef_tree);
John Koleszar's avatar
John Koleszar committed
254
        else
255
256
          vp9_cost_tokens((int *)(c[i][j][k]),
                          p[i][j][k],
257
                          vp9_coef_tree);
John Koleszar's avatar
John Koleszar committed
258
      }
John Koleszar's avatar
John Koleszar committed
259
260
}

261

262
263
264
265
static int rd_iifactor[32] =  { 4, 4, 3, 2, 1, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0, };
John Koleszar's avatar
John Koleszar committed
266

267
// 3* dc_qlookup[Q]*dc_qlookup[Q];
268

269
/* values are now correlated to quantizer */
Paul Wilkins's avatar
Paul Wilkins committed
270
271
272
static int sad_per_bit16lut[QINDEX_RANGE];
static int sad_per_bit4lut[QINDEX_RANGE];

273
void vp9_init_me_luts() {
John Koleszar's avatar
John Koleszar committed
274
275
276
277
278
279
280
  int i;

  // Initialize the sad lut tables using a formulaic calculation for now
  // This is to make it easier to resolve the impact of experimental changes
  // to the quantizer tables.
  for (i = 0; i < QINDEX_RANGE; i++) {
    sad_per_bit16lut[i] =
281
282
      (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
    sad_per_bit4lut[i] = (int)((0.063 * vp9_convert_qindex_to_q(i)) + 2.742);
John Koleszar's avatar
John Koleszar committed
283
  }
Paul Wilkins's avatar
Paul Wilkins committed
284
}
John Koleszar's avatar
John Koleszar committed
285

286
static int compute_rd_mult(int qindex) {
John Koleszar's avatar
John Koleszar committed
287
  int q;
288

289
  q = vp9_dc_quant(qindex, 0);
John Koleszar's avatar
John Koleszar committed
290
  return (11 * q * q) >> 6;
291
292
}

293
void vp9_initialize_me_consts(VP9_COMP *cpi, int QIndex) {
John Koleszar's avatar
John Koleszar committed
294
295
  cpi->mb.sadperbit16 =  sad_per_bit16lut[QIndex];
  cpi->mb.sadperbit4  =  sad_per_bit4lut[QIndex];
John Koleszar's avatar
John Koleszar committed
296
297
}

298

299
void vp9_initialize_rd_consts(VP9_COMP *cpi, int QIndex) {
300
  int q, i;
John Koleszar's avatar
John Koleszar committed
301

302
  vp9_clear_system_state();  // __asm emms;
John Koleszar's avatar
John Koleszar committed
303

John Koleszar's avatar
John Koleszar committed
304
305
306
307
308
  // Further tests required to see if optimum is different
  // for key frames, golden frames and arf frames.
  // if (cpi->common.refresh_golden_frame ||
  //     cpi->common.refresh_alt_ref_frame)
  QIndex = (QIndex < 0) ? 0 : ((QIndex > MAXQ) ? MAXQ : QIndex);
309

John Koleszar's avatar
John Koleszar committed
310
  cpi->RDMULT = compute_rd_mult(QIndex);
John Koleszar's avatar
John Koleszar committed
311

John Koleszar's avatar
John Koleszar committed
312
313
314
  // Extend rate multiplier along side quantizer zbin increases
  if (cpi->zbin_over_quant  > 0) {
    double oq_factor;
315

John Koleszar's avatar
John Koleszar committed
316
317
318
319
320
    // Experimental code using the same basic equation as used for Q above
    // The units of cpi->zbin_over_quant are 1/128 of Q bin size
    oq_factor = 1.0 + ((double)0.0015625 * cpi->zbin_over_quant);
    cpi->RDMULT = (int)((double)cpi->RDMULT * oq_factor * oq_factor);
  }
John Koleszar's avatar
John Koleszar committed
321

John Koleszar's avatar
John Koleszar committed
322
323
324
325
326
327
328
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
    if (cpi->twopass.next_iiratio > 31)
      cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
    else
      cpi->RDMULT +=
        (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
  }
John Koleszar's avatar
John Koleszar committed
329

John Koleszar's avatar
John Koleszar committed
330
331
  if (cpi->RDMULT < 7)
    cpi->RDMULT = 7;
332

John Koleszar's avatar
John Koleszar committed
333
334
  cpi->mb.errorperbit = (cpi->RDMULT / 110);
  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
335

336
  vp9_set_speed_features(cpi);
John Koleszar's avatar
John Koleszar committed
337

338
  q = (int)pow(vp9_dc_quant(QIndex, 0) >> 2, 1.25);
John Koleszar's avatar
John Koleszar committed
339
340
  q = q << 2;
  cpi->RDMULT = cpi->RDMULT << 4;
Paul Wilkins's avatar
Paul Wilkins committed
341

John Koleszar's avatar
John Koleszar committed
342
343
  if (q < 8)
    q = 8;
344

John Koleszar's avatar
John Koleszar committed
345
346
347
  if (cpi->RDMULT > 1000) {
    cpi->RDDIV = 1;
    cpi->RDMULT /= 100;
John Koleszar's avatar
John Koleszar committed
348

John Koleszar's avatar
John Koleszar committed
349
350
351
352
353
354
    for (i = 0; i < MAX_MODES; i++) {
      if (cpi->sf.thresh_mult[i] < INT_MAX) {
        cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
      } else {
        cpi->rd_threshes[i] = INT_MAX;
      }
John Koleszar's avatar
John Koleszar committed
355

John Koleszar's avatar
John Koleszar committed
356
      cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
John Koleszar's avatar
John Koleszar committed
357
    }
John Koleszar's avatar
John Koleszar committed
358
359
  } else {
    cpi->RDDIV = 100;
John Koleszar's avatar
John Koleszar committed
360

John Koleszar's avatar
John Koleszar committed
361
362
363
364
365
366
    for (i = 0; i < MAX_MODES; i++) {
      if (cpi->sf.thresh_mult[i] < (INT_MAX / q)) {
        cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
      } else {
        cpi->rd_threshes[i] = INT_MAX;
      }
John Koleszar's avatar
John Koleszar committed
367

John Koleszar's avatar
John Koleszar committed
368
      cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
John Koleszar's avatar
John Koleszar committed
369
    }
John Koleszar's avatar
John Koleszar committed
370
  }
John Koleszar's avatar
John Koleszar committed
371

372
373
374
375
376
377
378
379
380
381
382
383
384
385
  fill_token_costs(cpi->mb.token_costs[TX_4X4],
                   cpi->common.fc.coef_probs_4x4, BLOCK_TYPES_4X4);
  fill_token_costs(cpi->mb.hybrid_token_costs[TX_4X4],
                   cpi->common.fc.hybrid_coef_probs_4x4, BLOCK_TYPES_4X4);

  fill_token_costs(cpi->mb.token_costs[TX_8X8],
                   cpi->common.fc.coef_probs_8x8, BLOCK_TYPES_8X8);
  fill_token_costs(cpi->mb.hybrid_token_costs[TX_8X8],
                   cpi->common.fc.hybrid_coef_probs_8x8, BLOCK_TYPES_8X8);

  fill_token_costs(cpi->mb.token_costs[TX_16X16],
                   cpi->common.fc.coef_probs_16x16, BLOCK_TYPES_16X16);
  fill_token_costs(cpi->mb.hybrid_token_costs[TX_16X16],
                   cpi->common.fc.hybrid_coef_probs_16x16, BLOCK_TYPES_16X16);
Daniel Kang's avatar
Daniel Kang committed
386

387
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
388
389
  fill_token_costs(cpi->mb.token_costs[TX_32X32],
                   cpi->common.fc.coef_probs_32x32, BLOCK_TYPES_32X32);
390
391
#endif

John Koleszar's avatar
John Koleszar committed
392
393
  /*rough estimate for costing*/
  cpi->common.kf_ymode_probs_index = cpi->common.base_qindex >> 4;
394
  vp9_init_mode_costs(cpi);
John Koleszar's avatar
John Koleszar committed
395

396
  if (cpi->common.frame_type != KEY_FRAME) {
397
    vp9_build_nmv_cost_table(
398
399
400
401
402
403
        cpi->mb.nmvjointcost,
        cpi->mb.e_mbd.allow_high_precision_mv ?
        cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
        &cpi->common.fc.nmvc,
        cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
  }
John Koleszar's avatar
John Koleszar committed
404
405
}

406
int vp9_block_error_c(short *coeff, short *dqcoeff, int block_size) {
407
  int i, error = 0;
John Koleszar's avatar
John Koleszar committed
408

409
  for (i = 0; i < block_size; i++) {
John Koleszar's avatar
John Koleszar committed
410
411
412
    int this_diff = coeff[i] - dqcoeff[i];
    error += this_diff * this_diff;
  }
John Koleszar's avatar
John Koleszar committed
413

John Koleszar's avatar
John Koleszar committed
414
  return error;
John Koleszar's avatar
John Koleszar committed
415
416
}

417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
int vp9_mbblock_error_8x8_c(MACROBLOCK *mb, int dc) {
  BLOCK  *be;
  BLOCKD *bd;
  int i, j;
  int berror, error = 0;

  for (i = 0; i < 16; i+=4) {
    be = &mb->block[i];
    bd = &mb->e_mbd.block[i];
    berror = 0;
    for (j = dc; j < 64; j++) {
      int this_diff = be->coeff[j] - bd->dqcoeff[j];
      berror += this_diff * this_diff;
    }
    error += berror;
  }
  return error;
}

436
int vp9_mbblock_error_c(MACROBLOCK *mb, int dc) {
John Koleszar's avatar
John Koleszar committed
437
438
439
440
  BLOCK  *be;
  BLOCKD *bd;
  int i, j;
  int berror, error = 0;
John Koleszar's avatar
John Koleszar committed
441

John Koleszar's avatar
John Koleszar committed
442
443
444
445
446
447
448
  for (i = 0; i < 16; i++) {
    be = &mb->block[i];
    bd = &mb->e_mbd.block[i];
    berror = 0;
    for (j = dc; j < 16; j++) {
      int this_diff = be->coeff[j] - bd->dqcoeff[j];
      berror += this_diff * this_diff;
John Koleszar's avatar
John Koleszar committed
449
    }
John Koleszar's avatar
John Koleszar committed
450
451
452
    error += berror;
  }
  return error;
John Koleszar's avatar
John Koleszar committed
453
454
}

455
int vp9_mbuverror_c(MACROBLOCK *mb) {
John Koleszar's avatar
John Koleszar committed
456
457
  BLOCK  *be;
  BLOCKD *bd;
John Koleszar's avatar
John Koleszar committed
458

459
  int i, error = 0;
John Koleszar's avatar
John Koleszar committed
460

John Koleszar's avatar
John Koleszar committed
461
462
463
  for (i = 16; i < 24; i++) {
    be = &mb->block[i];
    bd = &mb->e_mbd.block[i];
John Koleszar's avatar
John Koleszar committed
464

465
    error += vp9_block_error_c(be->coeff, bd->dqcoeff, 16);
John Koleszar's avatar
John Koleszar committed
466
  }
John Koleszar's avatar
John Koleszar committed
467

John Koleszar's avatar
John Koleszar committed
468
  return error;
John Koleszar's avatar
John Koleszar committed
469
470
}

471
int vp9_uvsse(MACROBLOCK *x) {
John Koleszar's avatar
John Koleszar committed
472
473
474
475
476
477
478
  unsigned char *uptr, *vptr;
  unsigned char *upred_ptr = (*(x->block[16].base_src) + x->block[16].src);
  unsigned char *vpred_ptr = (*(x->block[20].base_src) + x->block[20].src);
  int uv_stride = x->block[16].src_stride;

  unsigned int sse1 = 0;
  unsigned int sse2 = 0;
479
480
  int mv_row = x->e_mbd.mode_info_context->mbmi.mv[0].as_mv.row;
  int mv_col = x->e_mbd.mode_info_context->mbmi.mv[0].as_mv.col;
John Koleszar's avatar
John Koleszar committed
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
  int offset;
  int pre_stride = x->e_mbd.block[16].pre_stride;

  if (mv_row < 0)
    mv_row -= 1;
  else
    mv_row += 1;

  if (mv_col < 0)
    mv_col -= 1;
  else
    mv_col += 1;

  mv_row /= 2;
  mv_col /= 2;

  offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
  uptr = x->e_mbd.pre.u_buffer + offset;
  vptr = x->e_mbd.pre.v_buffer + offset;

  if ((mv_row | mv_col) & 7) {
502
    vp9_sub_pixel_variance8x8(uptr, pre_stride, (mv_col & 7) << 1,
503
                              (mv_row & 7) << 1, upred_ptr, uv_stride, &sse2);
504
    vp9_sub_pixel_variance8x8(vptr, pre_stride, (mv_col & 7) << 1,
505
                              (mv_row & 7) << 1, vpred_ptr, uv_stride, &sse1);
John Koleszar's avatar
John Koleszar committed
506
507
    sse2 += sse1;
  } else {
508
509
    vp9_variance8x8(uptr, pre_stride, upred_ptr, uv_stride, &sse2);
    vp9_variance8x8(vptr, pre_stride, vpred_ptr, uv_stride, &sse1);
John Koleszar's avatar
John Koleszar committed
510
511
512
    sse2 += sse1;
  }
  return sse2;
John Koleszar's avatar
John Koleszar committed
513
514
515

}

516
static int cost_coeffs_2x2(MACROBLOCK *mb,
517
                           BLOCKD *b, PLANE_TYPE type,
518
                           ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) {
519
  int c = (type == PLANE_TYPE_Y_NO_DC); /* start at coef 0, unless Y with Y2 */
520
521
522
523
524
  int eob = b->eob;
  int pt;    /* surrounding block/prev coef predictor */
  int cost = 0;
  short *qcoeff_ptr = b->qcoeff;

525
  VP9_COMBINEENTROPYCONTEXTS(pt, *a, *l);
526
527
528
  assert(eob <= 4);

  for (; c < eob; c++) {
529
530
531
532
533
    int v = qcoeff_ptr[vp9_default_zig_zag1d[c]];
    int t = vp9_dct_value_tokens_ptr[v].Token;
    cost += mb->token_costs[TX_8X8][type][vp9_coef_bands[c]][pt][t];
    cost += vp9_dct_value_cost_ptr[v];
    pt = vp9_prev_token_class[t];
534
535
536
  }

  if (c < 4)
537
    cost += mb->token_costs[TX_8X8][type][vp9_coef_bands[c]]
538
            [pt] [DCT_EOB_TOKEN];
539
540
  // is eob first coefficient;
  pt = (c > !type);
541
542
543
544
  *a = *l = pt;
  return cost;
}

545
static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, PLANE_TYPE type,
Daniel Kang's avatar
Daniel Kang committed
546
                       ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
547
                       TX_SIZE tx_size) {
Daniel Kang's avatar
Daniel Kang committed
548
  const int eob = b->eob;
549
  int c = (type == PLANE_TYPE_Y_NO_DC); /* start at coef 0, unless Y with Y2 */
550
  int cost = 0, default_eob, seg_eob;
Daniel Kang's avatar
Daniel Kang committed
551
552
  int pt;                     /* surrounding block/prev coef predictor */
  int const *scan, *band;
John Koleszar's avatar
John Koleszar committed
553
  short *qcoeff_ptr = b->qcoeff;
554
555
556
  MACROBLOCKD *xd = &mb->e_mbd;
  MB_MODE_INFO *mbmi = &mb->e_mbd.mode_info_context->mbmi;
  TX_TYPE tx_type = DCT_DCT;
Paul Wilkins's avatar
Paul Wilkins committed
557
  int segment_id = mbmi->segment_id;
Jim Bankoski's avatar
Jim Bankoski committed
558
559
560
  scan = vp9_default_zig_zag1d;
  band = vp9_coef_bands;
  default_eob = 16;
561

562
  switch (tx_size) {
Daniel Kang's avatar
Daniel Kang committed
563
    case TX_4X4:
Deb Mukherjee's avatar
Deb Mukherjee committed
564
565
566
567
568
      if (type == PLANE_TYPE_Y_WITH_DC) {
        tx_type = get_tx_type_4x4(xd, b);
        if (tx_type != DCT_DCT) {
          switch (tx_type) {
            case ADST_DCT:
569
              scan = vp9_row_scan;
Deb Mukherjee's avatar
Deb Mukherjee committed
570
571
572
              break;

            case DCT_ADST:
573
              scan = vp9_col_scan;
Deb Mukherjee's avatar
Deb Mukherjee committed
574
575
576
              break;

            default:
577
              scan = vp9_default_zig_zag1d;
Deb Mukherjee's avatar
Deb Mukherjee committed
578
579
              break;
          }
580
        }
Daniel Kang's avatar
Daniel Kang committed
581
      }
Deb Mukherjee's avatar
Deb Mukherjee committed
582

Daniel Kang's avatar
Daniel Kang committed
583
584
      break;
    case TX_8X8:
585
586
      scan = vp9_default_zig_zag1d_8x8;
      band = vp9_coef_bands_8x8;
Daniel Kang's avatar
Daniel Kang committed
587
      default_eob = 64;
Deb Mukherjee's avatar
Deb Mukherjee committed
588
      if (type == PLANE_TYPE_Y_WITH_DC) {
589
        BLOCKD *bb;
590
        int ib = (int)(b - xd->block);
591
592
593
        if (ib < 16) {
          ib = (ib & 8) + ((ib & 4) >> 1);
          bb = xd->block + ib;
Deb Mukherjee's avatar
Deb Mukherjee committed
594
          tx_type = get_tx_type_8x8(xd, bb);
595
        }
596
      }
Daniel Kang's avatar
Daniel Kang committed
597
598
      break;
    case TX_16X16:
599
600
      scan = vp9_default_zig_zag1d_16x16;
      band = vp9_coef_bands_16x16;
Daniel Kang's avatar
Daniel Kang committed
601
      default_eob = 256;
Deb Mukherjee's avatar
Deb Mukherjee committed
602
603
      if (type == PLANE_TYPE_Y_WITH_DC) {
        tx_type = get_tx_type_16x16(xd, b);
604
605
606
607
608
609
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
      } else if (type == PLANE_TYPE_UV) {
        int ib = (int)(b - xd->block) - 16;

        qcoeff_ptr = xd->sb_coeff_data.qcoeff + 1024 + 64 * ib;
#endif
Deb Mukherjee's avatar
Deb Mukherjee committed
610
      }
Daniel Kang's avatar
Daniel Kang committed
611
      break;
612
613
614
615
616
617
618
619
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
    case TX_32X32:
      scan = vp9_default_zig_zag1d_32x32;
      band = vp9_coef_bands_32x32;
      default_eob = 1024;
      qcoeff_ptr = xd->sb_coeff_data.qcoeff;
      break;
#endif
Daniel Kang's avatar
Daniel Kang committed
620
    default:
621
      abort();
Daniel Kang's avatar
Daniel Kang committed
622
623
      break;
  }
624
625
  if (vp9_segfeature_active(&mb->e_mbd, segment_id, SEG_LVL_EOB))
    seg_eob = vp9_get_segdata(&mb->e_mbd, segment_id, SEG_LVL_EOB);
626
627
628
  else
    seg_eob = default_eob;

629
  VP9_COMBINEENTROPYCONTEXTS(pt, *a, *l);
630

631
632
633
  if (tx_type != DCT_DCT) {
    for (; c < eob; c++) {
      int v = qcoeff_ptr[scan[c]];
634
      int t = vp9_dct_value_tokens_ptr[v].Token;
635
      cost += mb->hybrid_token_costs[tx_size][type][band[c]][pt][t];
636
637
      cost += vp9_dct_value_cost_ptr[v];
      pt = vp9_prev_token_class[t];
638
639
640
641
    }
    if (c < seg_eob)
      cost += mb->hybrid_token_costs[tx_size][type][band[c]]
          [pt][DCT_EOB_TOKEN];
642
  } else {
643
644
    for (; c < eob; c++) {
      int v = qcoeff_ptr[scan[c]];
645
      int t = vp9_dct_value_tokens_ptr[v].Token;
646
      cost += mb->token_costs[tx_size][type][band[c]][pt][t];
647
648
      cost += vp9_dct_value_cost_ptr[v];
      pt = vp9_prev_token_class[t];
649
650
651
652
    }
    if (c < seg_eob)
      cost += mb->token_costs[tx_size][type][band[c]]
          [pt][DCT_EOB_TOKEN];
653
654
  }

655
656
  // is eob first coefficient;
  pt = (c > !type);
657
658
659
660
  *a = *l = pt;
  return cost;
}

661
static int rdcost_mby_4x4(MACROBLOCK *mb, int has_2nd_order, int backup) {
John Koleszar's avatar
John Koleszar committed
662
663
  int cost = 0;
  int b;
Paul Wilkins's avatar
Paul Wilkins committed
664
  MACROBLOCKD *xd = &mb->e_mbd;
John Koleszar's avatar
John Koleszar committed
665
666
667
  ENTROPY_CONTEXT_PLANES t_above, t_left;
  ENTROPY_CONTEXT *ta;
  ENTROPY_CONTEXT *tl;
668

669
670
671
  if (backup) {
    vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
John Koleszar's avatar
John Koleszar committed
672

673
674
675
676
677
678
    ta = (ENTROPY_CONTEXT *)&t_above;
    tl = (ENTROPY_CONTEXT *)&t_left;
  } else {
    ta = (ENTROPY_CONTEXT *)xd->above_context;
    tl = (ENTROPY_CONTEXT *)xd->left_context;
  }
John Koleszar's avatar
John Koleszar committed
679

John Koleszar's avatar
John Koleszar committed
680
  for (b = 0; b < 16; b++)
681
682
683
    cost += cost_coeffs(mb, xd->block + b,
                        (has_2nd_order ?
                         PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC),
684
                        ta + vp9_block2above[b], tl + vp9_block2left[b],
Daniel Kang's avatar
Daniel Kang committed
685
                        TX_4X4);
John Koleszar's avatar
John Koleszar committed
686

687
688
689
690
  if (has_2nd_order)
    cost += cost_coeffs(mb, xd->block + 24, PLANE_TYPE_Y2,
                        ta + vp9_block2above[24], tl + vp9_block2left[24],
                        TX_4X4);
John Koleszar's avatar
John Koleszar committed
691

John Koleszar's avatar
John Koleszar committed
692
  return cost;
John Koleszar's avatar
John Koleszar committed
693
694
}

695
696
697
static void macro_block_yrd_4x4(MACROBLOCK *mb,
                                int *Rate,
                                int *Distortion,
698
                                int *skippable, int backup) {
Paul Wilkins's avatar
Paul Wilkins committed
699
  MACROBLOCKD *const xd = &mb->e_mbd;
John Koleszar's avatar
John Koleszar committed
700
  BLOCK   *const mb_y2 = mb->block + 24;
Paul Wilkins's avatar
Paul Wilkins committed
701
  BLOCKD *const x_y2  = xd->block + 24;
Jim Bankoski's avatar
Jim Bankoski committed
702
  int d, has_2nd_order;
703

704
705
  xd->mode_info_context->mbmi.txfm_size = TX_4X4;
  has_2nd_order = get_2nd_order_usage(xd);
John Koleszar's avatar
John Koleszar committed
706
  // Fdct and building the 2nd order block
707
708
709
710
711
  vp9_transform_mby_4x4(mb);
  vp9_quantize_mby_4x4(mb);
  d = vp9_mbblock_error(mb, has_2nd_order);
  if (has_2nd_order)
    d += vp9_block_error(mb_y2->coeff, x_y2->dqcoeff, 16);
712

John Koleszar's avatar
John Koleszar committed
713
714
  *Distortion = (d >> 2);
  // rate
715
716
  *Rate = rdcost_mby_4x4(mb, has_2nd_order, backup);
  *skippable = vp9_mby_is_skippable_4x4(&mb->e_mbd, has_2nd_order);
717
}
John Koleszar's avatar
John Koleszar committed
718

719
static int rdcost_mby_8x8(MACROBLOCK *mb, int has_2nd_order, int backup) {
John Koleszar's avatar
John Koleszar committed
720
721
  int cost = 0;
  int b;
Paul Wilkins's avatar
Paul Wilkins committed
722
  MACROBLOCKD *xd = &mb->e_mbd;
John Koleszar's avatar
John Koleszar committed
723
724
725
726
  ENTROPY_CONTEXT_PLANES t_above, t_left;
  ENTROPY_CONTEXT *ta;
  ENTROPY_CONTEXT *tl;

Ronald S. Bultje's avatar
Ronald S. Bultje committed
727
728
729
  if (backup) {
    vpx_memcpy(&t_above,xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
John Koleszar's avatar
John Koleszar committed
730

Ronald S. Bultje's avatar
Ronald S. Bultje committed
731
732
733
734
735
736
    ta = (ENTROPY_CONTEXT *)&t_above;
    tl = (ENTROPY_CONTEXT *)&t_left;
  } else {
    ta = (ENTROPY_CONTEXT *)mb->e_mbd.above_context;
    tl = (ENTROPY_CONTEXT *)mb->e_mbd.left_context;
  }
John Koleszar's avatar
John Koleszar committed
737
738

  for (b = 0; b < 16; b += 4)
739
740
741
    cost += cost_coeffs(mb, xd->block + b,
                        (has_2nd_order ?
                         PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC),
742
                        ta + vp9_block2above_8x8[b], tl + vp9_block2left_8x8[b],
Daniel Kang's avatar
Daniel Kang committed
743
                        TX_8X8);
John Koleszar's avatar
John Koleszar committed
744

745
746
747
  if (has_2nd_order)
    cost += cost_coeffs_2x2(mb, xd->block + 24, PLANE_TYPE_Y2,
                            ta + vp9_block2above[24], tl + vp9_block2left[24]);
John Koleszar's avatar
John Koleszar committed
748
  return cost;
749
750
}

John Koleszar's avatar
John Koleszar committed
751
752
753
static void macro_block_yrd_8x8(MACROBLOCK *mb,
                                int *Rate,
                                int *Distortion,
754
                                int *skippable, int backup) {
Paul Wilkins's avatar
Paul Wilkins committed
755
  MACROBLOCKD *const xd = &mb->e_mbd;
John Koleszar's avatar
John Koleszar committed
756
  BLOCK   *const mb_y2 = mb->block + 24;
Paul Wilkins's avatar
Paul Wilkins committed
757
  BLOCKD *const x_y2  = xd->block + 24;
758
759
760
  int d, has_2nd_order;

  xd->mode_info_context->mbmi.txfm_size = TX_8X8;
John Koleszar's avatar
John Koleszar committed
761

762
763
  vp9_transform_mby_8x8(mb);
  vp9_quantize_mby_8x8(mb);
764
765
766
767
  has_2nd_order = get_2nd_order_usage(xd);
  d = vp9_mbblock_error_8x8_c(mb, has_2nd_order);
  if (has_2nd_order)
    d += vp9_block_error(mb_y2->coeff, x_y2->dqcoeff, 16);
John Koleszar's avatar
John Koleszar committed
768
769
770

  *Distortion = (d >> 2);
  // rate
771
772
  *Rate = rdcost_mby_8x8(mb, has_2nd_order, backup);
  *skippable = vp9_mby_is_skippable_8x8(&mb->e_mbd, has_2nd_order);
773
}
774

775
static int rdcost_mby_16x16(MACROBLOCK *mb, int backup) {
Daniel Kang's avatar
Daniel Kang committed
776
  int cost;
Paul Wilkins's avatar
Paul Wilkins committed
777
  MACROBLOCKD *xd = &mb->e_mbd;
Daniel Kang's avatar
Daniel Kang committed
778
779
780
  ENTROPY_CONTEXT_PLANES t_above, t_left;
  ENTROPY_CONTEXT *ta, *tl;

781
782
783
  if (backup) {
    vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
Daniel Kang's avatar
Daniel Kang committed
784

785
786
787
788
789
790
    ta = (ENTROPY_CONTEXT *)&t_above;
    tl = (ENTROPY_CONTEXT *)&t_left;
  } else {
    ta = (ENTROPY_CONTEXT *)xd->above_context;
    tl = (ENTROPY_CONTEXT *)xd->left_context;
  }
Daniel Kang's avatar
Daniel Kang committed
791

Paul Wilkins's avatar
Paul Wilkins committed
792
  cost = cost_coeffs(mb, xd->block, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_16X16);
Daniel Kang's avatar
Daniel Kang committed
793
794
  return cost;
}
795

Daniel Kang's avatar
Daniel Kang committed
796
static void macro_block_yrd_16x16(MACROBLOCK *mb, int *Rate, int *Distortion,
797
                                  int *skippable, int backup) {
Daniel Kang's avatar
Daniel Kang committed
798
  int d;
Deb Mukherjee's avatar
Deb Mukherjee committed
799
  MACROBLOCKD *xd = &mb->e_mbd;
Daniel Kang's avatar
Daniel Kang committed
800

801
802
  xd->mode_info_context->mbmi.txfm_size = TX_16X16;
  vp9_transform_mby_16x16(mb);
803
  vp9_quantize_mby_16x16(mb);
804
805
806
  // TODO(jingning) is it possible to quickly determine whether to force
  //                trailing coefficients to be zero, instead of running trellis
  //                optimization in the rate-distortion optimization loop?
807
  if (mb->e_mbd.mode_info_context->mbmi.mode < I8X8_PRED)
808
    vp9_optimize_mby_16x16(mb);
809

810
  d = vp9_mbblock_error(mb, 0);
Daniel Kang's avatar
Daniel Kang committed
811
812
813

  *Distortion = (d >> 2);
  // rate
814
  *Rate = rdcost_mby_16x16(mb, backup);
815
  *skippable = vp9_mby_is_skippable_16x16(&mb->e_mbd);
Daniel Kang's avatar
Daniel Kang committed
816
817
}

818
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
819
820
821
822
823
                                     int (*r)[2], int *rate,
                                     int *d, int *distortion,
                                     int *s, int *skip,
                                     int64_t txfm_cache[NB_TXFM_MODES],
                                     TX_SIZE max_txfm_size) {
824
825
826
827
828
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
  vp9_prob skip_prob = cm->mb_no_coeff_skip ?
                       vp9_get_pred_prob(cm, xd, PRED_MBSKIP) : 128;
829
830
831
832
833
834
835
836
837
838
839
840
  int64_t rd[TX_SIZE_MAX_SB][2];
  int n, m;

  for (n = TX_4X4; n <= max_txfm_size; n++) {
    r[n][1] = r[n][0];
    for (m = 0; m <= n - (n == max_txfm_size); m++) {
      if (m == n)
        r[n][1] += vp9_cost_zero(cm->prob_tx[m]);
      else
        r[n][1] += vp9_cost_one(cm->prob_tx[m]);
    }
  }
841
842
843
844
845
846
847
848

  if (cm->mb_no_coeff_skip) {
    int s0, s1;

    assert(skip_prob > 0);
    s0 = vp9_cost_bit(skip_prob, 0);
    s1 = vp9_cost_bit(skip_prob, 1);

849
    for (n = TX_4X4; n <= max_txfm_size; n++) {
850
      if (s[n]) {
851
        rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
852
      } else {
853
854
        rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
        rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
855
      }
856
857
    }
  } else {
858
859
860
    for (n = TX_4X4; n <= max_txfm_size; n++) {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0], d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1], d[n]);
861
862
863
    }
  }

864
865
866
867
868
869
870
871
872
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
  if (max_txfm_size == TX_32X32 &&
      (cm->txfm_mode == ALLOW_32X32 ||
       (cm->txfm_mode == TX_MODE_SELECT &&
        rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
        rd[TX_32X32][1] < rd[TX_4X4][1]))) {
    mbmi->txfm_size = TX_32X32;
  } else
#endif
873
  if ( cm->txfm_mode == ALLOW_16X16 ||
874
875
876
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
      (max_txfm_size == TX_16X16 && cm->txfm_mode == ALLOW_32X32) ||
#endif
877
      (cm->txfm_mode == TX_MODE_SELECT &&
878
       rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])) {
879
    mbmi->txfm_size = TX_16X16;
880
  } else if (cm->txfm_mode == ALLOW_8X8 ||
881
           (cm->txfm_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
882
883
    mbmi->txfm_size = TX_8X8;
  } else {
884
    assert(cm->txfm_mode == ONLY_4X4 || cm->txfm_mode == TX_MODE_SELECT);
885
886
887
    mbmi->txfm_size = TX_4X4;
  }

888
  *distortion = d[mbmi->txfm_size];
889
  *rate       = r[mbmi->txfm_size][cm->txfm_mode == TX_MODE_SELECT];
890
891
  *skip       = s[mbmi->txfm_size];

892
893
894
895
896
897
898
899
900
901
902
903
904
  txfm_cache[ONLY_4X4] = rd[TX_4X4][0];
  txfm_cache[ALLOW_8X8] = rd[TX_8X8][0];
  txfm_cache[ALLOW_16X16] = rd[TX_16X16][0];
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
  txfm_cache[ALLOW_32X32] = rd[max_txfm_size][0];
  if (max_txfm_size == TX_32X32 &&
      rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
      rd[TX_32X32][1] < rd[TX_4X4][1])
    txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
  else
#endif
  if (rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
    txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
905
  else
906
907
    txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
                                 rd[TX_4X4][1] : rd[TX_8X8][1];
908
909
910
911
912
913
}

static void macro_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
                            int *distortion, int *skippable,
                            int64_t txfm_cache[NB_TXFM_MODES]) {
  MACROBLOCKD *const xd = &x->e_mbd;
914
  int r[TX_SIZE_MAX_MB][2], d[TX_SIZE_MAX_MB], s[TX_SIZE_MAX_MB];
915
916
917

  vp9_subtract_mby(x->src_diff, *(x->block[0].base_src), xd->predictor,
                   x->block[0].src_stride);
918

919
920
921
  macro_block_yrd_16x16(x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16], 1);
  macro_block_yrd_8x8(x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], 1);
  macro_block_yrd_4x4(x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], 1);
922
923

  choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skippable,
924
                           txfm_cache, TX_16X16);
925
926
}

John Koleszar's avatar
John Koleszar committed
927
928
929
930
931
932
933
static void copy_predictor(unsigned char *dst, const unsigned char *predictor) {
  const unsigned int *p = (const unsigned int *)predictor;
  unsigned int *d = (unsigned int *)dst;
  d[0] = p[0];
  d[4] = p[4];
  d[8] = p[8];
  d[12] = p[12];
934
}
Yaowu Xu's avatar
Yaowu Xu committed
935

Ronald S. Bultje's avatar
Ronald S. Bultje committed
936
#if CONFIG_SUPERBLOCKS
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
#if CONFIG_TX32X32
static int rdcost_sby_32x32(MACROBLOCK *x) {
  MACROBLOCKD * const xd = &x->e_mbd;
  ENTROPY_CONTEXT_PLANES t_above, t_left;
  ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) &t_above,
                  *tl = (ENTROPY_CONTEXT *) &t_left;

  vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
  vpx_memcpy(&t_left,  xd->left_context,  sizeof(ENTROPY_CONTEXT_PLANES));

  return cost_coeffs(x, xd->block, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_32X32);
}

static int vp9_sb_block_error_c(short *coeff, short *dqcoeff, int block_size) {
  int i;
  int64_t error = 0;

  for (i = 0; i < block_size; i++) {
    unsigned int this_diff = coeff[i] - dqcoeff[i];
    error += this_diff * this_diff;
  }

  return error > INT_MAX ? INT_MAX : error;
}

#define DEBUG_ERROR 0
static void super_block_yrd_32x32(MACROBLOCK *x,
                                  int *rate, int *distortion, int *skippable) {
  SUPERBLOCK  * const x_sb = &x->sb_coeff_data;
  MACROBLOCKD * const xd = &x->e_mbd;
  SUPERBLOCKD * const xd_sb = &xd->sb_coeff_data;
#if DEBUG_ERROR || CONFIG_DWT32X32HYBRID
  short out[1024];
#endif

  vp9_transform_sby_32x32(x);
  vp9_quantize_sby_32x32(x);
#if DEBUG_ERROR || CONFIG_DWT32X32HYBRID
  vp9_short_idct32x32(xd_sb->dqcoeff, out, 64);
#endif

#if !CONFIG_DWT32X32HYBRID
  *distortion = vp9_sb_block_error_c(x_sb->coeff, xd_sb->dqcoeff, 1024);
#else
  *distortion = vp9_block_error_c(x_sb->src_diff, out, 1024) << 4;
#endif
#if DEBUG_ERROR
  printf("IDCT/FDCT error 32x32: %d (d: %d)\n",
         vp9_block_error_c(x_sb->src_diff, out, 1024), *distortion);
#endif
  *rate       = rdcost_sby_32x32(x);
  *skippable  = vp9_sby_is_skippable_32x32(&x->e_mbd);
}
#endif

992
993
static void super_block_yrd(VP9_COMP *cpi,
                            MACROBLOCK *x, int *rate, int *distortion,
994
                            int *skip,
995
                            int64_t txfm_cache[NB_TXFM_MODES]) {
Ronald S. Bultje's avatar
Ronald S. Bultje committed
996
  MACROBLOCKD *const xd = &x->e_mbd;
997
  int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB], n;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
998
999
  const uint8_t *src = x->src.y_buffer, *dst = xd->dst.y_buffer;
  int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride;
1000
1001
1002
1003
  ENTROPY_CONTEXT_PLANES t_above[TX_SIZE_MAX_MB][2],
                        *orig_above = xd->above_context;
  ENTROPY_CONTEXT_PLANES t_left[TX_SIZE_MAX_MB][2],
                        *orig_left = xd->left_context;
1004

1005
  for (n = TX_4X4; n < TX_SIZE_MAX_MB; n++) {
1006
1007
    vpx_memcpy(t_above[n], xd->above_context, sizeof(t_above[n]));
    vpx_memcpy(t_left[n], xd->left_context, sizeof(t_left[n]));
1008
    r[n][0] = 0;
1009
1010
1011
    d[n] = 0;
    s[n] = 1;
  }
Ronald S. Bultje's avatar
Ronald S. Bultje committed
1012

1013
1014
1015
1016
1017
1018
1019
1020
1021
#if CONFIG_TX32X32
  vp9_subtract_sby_s_c(x->sb_coeff_data.src_diff, src, src_y_stride,
                       dst, dst_y_stride);
  super_block_yrd_32x32(x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32]);
#endif

#if DEBUG_ERROR
  int err[3] = { 0, 0, 0 };
#endif
Ronald S. Bultje's avatar
Ronald S. Bultje committed
1022
1023
  for (n = 0; n < 4; n++) {
    int x_idx = n & 1, y_idx = n >> 1;
1024
    int r_tmp, d_tmp, s_tmp;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
1025

1026
    vp9_subtract_mby_s_c(x->src_diff,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
1027
1028
1029
1030
                         src + x_idx * 16 + y_idx * 16 * src_y_stride,
                         src_y_stride,
                         dst + x_idx * 16 + y_idx * 16 * dst_y_stride,
                         dst_y_stride);
1031
1032
1033

    xd->above_context = &t_above[TX_16X16][x_idx];
    xd->left_context = &t_left[TX_16X16][y_idx];
1034
    macro_block_yrd_16x16(x, &r_tmp, &d_tmp, &s_tmp, 0);
1035
    d[TX_16X16] += d_tmp;
1036
    r[TX_16X16][0] += r_tmp;
1037
    s[TX_16X16] = s[TX_16X16] && s_tmp;
1038
1039
1040
1041
#if DEBUG_ERROR
    vp9_inverse_transform_mby_16x16(xd);
    err[2] += vp9_block_error_c(xd->diff, x->src_diff, 256);
#endif
1042
1043
1044

    xd->above_context = &t_above[TX_4X4][x_idx];
    xd->left_context = &t_left[TX_4X4][y_idx];
1045
    macro_block_yrd_4x4(x, &r_tmp, &d_tmp, &s_tmp, 0);
1046
    d[TX_4X4] += d_tmp;