vp9_rdopt.c 171 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
11
12
13
14
15
 */


#include <stdio.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
16
17
#include "vp9/common/vp9_pragmas.h"

18
19
20
21
22
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_modecosts.h"
#include "vp9/encoder/vp9_encodeintra.h"
23
24
25
26
27
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_quant_common.h"
28
29
30
31
32
33
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_variance.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_ratectrl.h"
John Koleszar's avatar
John Koleszar committed
34
#include "vpx_mem/vpx_mem.h"
35
36
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
John Koleszar's avatar
John Koleszar committed
37

38
39
40
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_entropy.h"
41
#include "vp9_rtcd.h"
42
#include "vp9/common/vp9_mvref_common.h"
Ronald S. Bultje's avatar
Ronald S. Bultje committed
43
#include "vp9/common/vp9_common.h"
Paul Wilkins's avatar
Paul Wilkins committed
44

John Koleszar's avatar
John Koleszar committed
45
46
#define MAXF(a,b)            (((a) > (b)) ? (a) : (b))

47
48
#define INVALID_MV 0x80008000

49
50
51
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1

John Koleszar's avatar
John Koleszar committed
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
static const int auto_speed_thresh[17] = {
  1000,
  200,
  150,
  130,
  150,
  125,
  120,
  115,
  115,
  115,
  115,
  115,
  115,
  115,
  115,
  115,
  105
John Koleszar's avatar
John Koleszar committed
70
71
};

72
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
73
74
  {ZEROMV,    LAST_FRAME,   NONE},
  {DC_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
75

76
77
  {NEARESTMV, LAST_FRAME,   NONE},
  {NEARMV,    LAST_FRAME,   NONE},
John Koleszar's avatar
John Koleszar committed
78

79
80
  {ZEROMV,    GOLDEN_FRAME, NONE},
  {NEARESTMV, GOLDEN_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
81

82
83
  {ZEROMV,    ALTREF_FRAME, NONE},
  {NEARESTMV, ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
84

85
86
  {NEARMV,    GOLDEN_FRAME, NONE},
  {NEARMV,    ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
87

88
89
90
91
92
93
94
95
  {V_PRED,    INTRA_FRAME,  NONE},
  {H_PRED,    INTRA_FRAME,  NONE},
  {D45_PRED,  INTRA_FRAME,  NONE},
  {D135_PRED, INTRA_FRAME,  NONE},
  {D117_PRED, INTRA_FRAME,  NONE},
  {D153_PRED, INTRA_FRAME,  NONE},
  {D27_PRED,  INTRA_FRAME,  NONE},
  {D63_PRED,  INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
96

97
  {TM_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
98

99
100
101
  {NEWMV,     LAST_FRAME,   NONE},
  {NEWMV,     GOLDEN_FRAME, NONE},
  {NEWMV,     ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
102

103
104
105
  {SPLITMV,   LAST_FRAME,   NONE},
  {SPLITMV,   GOLDEN_FRAME, NONE},
  {SPLITMV,   ALTREF_FRAME, NONE},
106

107
108
  {B_PRED,    INTRA_FRAME,  NONE},
  {I8X8_PRED, INTRA_FRAME,  NONE},
109

John Koleszar's avatar
John Koleszar committed
110
111
112
113
  /* compound prediction modes */
  {ZEROMV,    LAST_FRAME,   GOLDEN_FRAME},
  {NEARESTMV, LAST_FRAME,   GOLDEN_FRAME},
  {NEARMV,    LAST_FRAME,   GOLDEN_FRAME},
114

John Koleszar's avatar
John Koleszar committed
115
116
117
  {ZEROMV,    ALTREF_FRAME, LAST_FRAME},
  {NEARESTMV, ALTREF_FRAME, LAST_FRAME},
  {NEARMV,    ALTREF_FRAME, LAST_FRAME},
118

John Koleszar's avatar
John Koleszar committed
119
120
121
  {ZEROMV,    GOLDEN_FRAME, ALTREF_FRAME},
  {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
  {NEARMV,    GOLDEN_FRAME, ALTREF_FRAME},
122

John Koleszar's avatar
John Koleszar committed
123
124
125
  {NEWMV,     LAST_FRAME,   GOLDEN_FRAME},
  {NEWMV,     ALTREF_FRAME, LAST_FRAME  },
  {NEWMV,     GOLDEN_FRAME, ALTREF_FRAME},
126

John Koleszar's avatar
John Koleszar committed
127
128
  {SPLITMV,   LAST_FRAME,   GOLDEN_FRAME},
  {SPLITMV,   ALTREF_FRAME, LAST_FRAME  },
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
  {SPLITMV,   GOLDEN_FRAME, ALTREF_FRAME},

#if CONFIG_COMP_INTERINTRA_PRED
  /* compound inter-intra prediction */
  {ZEROMV,    LAST_FRAME,   INTRA_FRAME},
  {NEARESTMV, LAST_FRAME,   INTRA_FRAME},
  {NEARMV,    LAST_FRAME,   INTRA_FRAME},
  {NEWMV,     LAST_FRAME,   INTRA_FRAME},

  {ZEROMV,    GOLDEN_FRAME,   INTRA_FRAME},
  {NEARESTMV, GOLDEN_FRAME,   INTRA_FRAME},
  {NEARMV,    GOLDEN_FRAME,   INTRA_FRAME},
  {NEWMV,     GOLDEN_FRAME,   INTRA_FRAME},

  {ZEROMV,    ALTREF_FRAME,   INTRA_FRAME},
  {NEARESTMV, ALTREF_FRAME,   INTRA_FRAME},
  {NEARMV,    ALTREF_FRAME,   INTRA_FRAME},
  {NEWMV,     ALTREF_FRAME,   INTRA_FRAME},
#endif
John Koleszar's avatar
John Koleszar committed
148
149
};

150
151
152
static void fill_token_costs(vp9_coeff_count *c,
                             vp9_coeff_probs *p,
                             int block_type_counts) {
John Koleszar's avatar
John Koleszar committed
153
154
155
156
157
158
  int i, j, k;

  for (i = 0; i < block_type_counts; i++)
    for (j = 0; j < COEF_BANDS; j++)
      for (k = 0; k < PREV_COEF_CONTEXTS; k++) {
        if (k == 0 && ((j > 0 && i > 0) || (j > 1 && i == 0)))
159
160
          vp9_cost_tokens_skip((int *)(c[i][j][k]),
                               p[i][j][k],
161
                               vp9_coef_tree);
John Koleszar's avatar
John Koleszar committed
162
        else
163
164
          vp9_cost_tokens((int *)(c[i][j][k]),
                          p[i][j][k],
165
                          vp9_coef_tree);
John Koleszar's avatar
John Koleszar committed
166
      }
John Koleszar's avatar
John Koleszar committed
167
168
}

169

170
171
172
173
static int rd_iifactor[32] =  { 4, 4, 3, 2, 1, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0, };
John Koleszar's avatar
John Koleszar committed
174

175
// 3* dc_qlookup[Q]*dc_qlookup[Q];
176

177
/* values are now correlated to quantizer */
Paul Wilkins's avatar
Paul Wilkins committed
178
179
180
static int sad_per_bit16lut[QINDEX_RANGE];
static int sad_per_bit4lut[QINDEX_RANGE];

181
void vp9_init_me_luts() {
John Koleszar's avatar
John Koleszar committed
182
183
184
185
186
187
188
  int i;

  // Initialize the sad lut tables using a formulaic calculation for now
  // This is to make it easier to resolve the impact of experimental changes
  // to the quantizer tables.
  for (i = 0; i < QINDEX_RANGE; i++) {
    sad_per_bit16lut[i] =
189
190
      (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
    sad_per_bit4lut[i] = (int)((0.063 * vp9_convert_qindex_to_q(i)) + 2.742);
John Koleszar's avatar
John Koleszar committed
191
  }
Paul Wilkins's avatar
Paul Wilkins committed
192
}
John Koleszar's avatar
John Koleszar committed
193

194
static int compute_rd_mult(int qindex) {
John Koleszar's avatar
John Koleszar committed
195
  int q;
196

197
  q = vp9_dc_quant(qindex, 0);
John Koleszar's avatar
John Koleszar committed
198
  return (11 * q * q) >> 6;
199
200
}

201
void vp9_initialize_me_consts(VP9_COMP *cpi, int QIndex) {
John Koleszar's avatar
John Koleszar committed
202
203
  cpi->mb.sadperbit16 =  sad_per_bit16lut[QIndex];
  cpi->mb.sadperbit4  =  sad_per_bit4lut[QIndex];
John Koleszar's avatar
John Koleszar committed
204
205
}

206

207
void vp9_initialize_rd_consts(VP9_COMP *cpi, int QIndex) {
208
  int q, i;
John Koleszar's avatar
John Koleszar committed
209

210
  vp9_clear_system_state();  // __asm emms;
John Koleszar's avatar
John Koleszar committed
211

John Koleszar's avatar
John Koleszar committed
212
213
214
215
216
  // Further tests required to see if optimum is different
  // for key frames, golden frames and arf frames.
  // if (cpi->common.refresh_golden_frame ||
  //     cpi->common.refresh_alt_ref_frame)
  QIndex = (QIndex < 0) ? 0 : ((QIndex > MAXQ) ? MAXQ : QIndex);
217

John Koleszar's avatar
John Koleszar committed
218
  cpi->RDMULT = compute_rd_mult(QIndex);
John Koleszar's avatar
John Koleszar committed
219

John Koleszar's avatar
John Koleszar committed
220
221
222
  // Extend rate multiplier along side quantizer zbin increases
  if (cpi->zbin_over_quant  > 0) {
    double oq_factor;
223

John Koleszar's avatar
John Koleszar committed
224
225
226
227
228
    // Experimental code using the same basic equation as used for Q above
    // The units of cpi->zbin_over_quant are 1/128 of Q bin size
    oq_factor = 1.0 + ((double)0.0015625 * cpi->zbin_over_quant);
    cpi->RDMULT = (int)((double)cpi->RDMULT * oq_factor * oq_factor);
  }
John Koleszar's avatar
John Koleszar committed
229

John Koleszar's avatar
John Koleszar committed
230
231
232
233
234
235
236
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
    if (cpi->twopass.next_iiratio > 31)
      cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
    else
      cpi->RDMULT +=
        (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
  }
John Koleszar's avatar
John Koleszar committed
237

John Koleszar's avatar
John Koleszar committed
238
239
  if (cpi->RDMULT < 7)
    cpi->RDMULT = 7;
240

John Koleszar's avatar
John Koleszar committed
241
242
  cpi->mb.errorperbit = (cpi->RDMULT / 110);
  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
243

244
  vp9_set_speed_features(cpi);
John Koleszar's avatar
John Koleszar committed
245

246
  q = (int)pow(vp9_dc_quant(QIndex, 0) >> 2, 1.25);
John Koleszar's avatar
John Koleszar committed
247
248
  q = q << 2;
  cpi->RDMULT = cpi->RDMULT << 4;
Paul Wilkins's avatar
Paul Wilkins committed
249

John Koleszar's avatar
John Koleszar committed
250
251
  if (q < 8)
    q = 8;
252

John Koleszar's avatar
John Koleszar committed
253
254
255
  if (cpi->RDMULT > 1000) {
    cpi->RDDIV = 1;
    cpi->RDMULT /= 100;
John Koleszar's avatar
John Koleszar committed
256

John Koleszar's avatar
John Koleszar committed
257
258
259
260
261
262
    for (i = 0; i < MAX_MODES; i++) {
      if (cpi->sf.thresh_mult[i] < INT_MAX) {
        cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
      } else {
        cpi->rd_threshes[i] = INT_MAX;
      }
John Koleszar's avatar
John Koleszar committed
263

John Koleszar's avatar
John Koleszar committed
264
      cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
John Koleszar's avatar
John Koleszar committed
265
    }
John Koleszar's avatar
John Koleszar committed
266
267
  } else {
    cpi->RDDIV = 100;
John Koleszar's avatar
John Koleszar committed
268

John Koleszar's avatar
John Koleszar committed
269
270
271
272
273
274
    for (i = 0; i < MAX_MODES; i++) {
      if (cpi->sf.thresh_mult[i] < (INT_MAX / q)) {
        cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
      } else {
        cpi->rd_threshes[i] = INT_MAX;
      }
John Koleszar's avatar
John Koleszar committed
275

John Koleszar's avatar
John Koleszar committed
276
      cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
John Koleszar's avatar
John Koleszar committed
277
    }
John Koleszar's avatar
John Koleszar committed
278
  }
John Koleszar's avatar
John Koleszar committed
279

280
281
282
283
284
285
286
287
288
289
290
291
292
293
  fill_token_costs(cpi->mb.token_costs[TX_4X4],
                   cpi->common.fc.coef_probs_4x4, BLOCK_TYPES_4X4);
  fill_token_costs(cpi->mb.hybrid_token_costs[TX_4X4],
                   cpi->common.fc.hybrid_coef_probs_4x4, BLOCK_TYPES_4X4);

  fill_token_costs(cpi->mb.token_costs[TX_8X8],
                   cpi->common.fc.coef_probs_8x8, BLOCK_TYPES_8X8);
  fill_token_costs(cpi->mb.hybrid_token_costs[TX_8X8],
                   cpi->common.fc.hybrid_coef_probs_8x8, BLOCK_TYPES_8X8);

  fill_token_costs(cpi->mb.token_costs[TX_16X16],
                   cpi->common.fc.coef_probs_16x16, BLOCK_TYPES_16X16);
  fill_token_costs(cpi->mb.hybrid_token_costs[TX_16X16],
                   cpi->common.fc.hybrid_coef_probs_16x16, BLOCK_TYPES_16X16);
Daniel Kang's avatar
Daniel Kang committed
294

295
296
  fill_token_costs(cpi->mb.token_costs[TX_32X32],
                   cpi->common.fc.coef_probs_32x32, BLOCK_TYPES_32X32);
297

John Koleszar's avatar
John Koleszar committed
298
299
  /*rough estimate for costing*/
  cpi->common.kf_ymode_probs_index = cpi->common.base_qindex >> 4;
300
  vp9_init_mode_costs(cpi);
John Koleszar's avatar
John Koleszar committed
301

302
  if (cpi->common.frame_type != KEY_FRAME) {
303
    vp9_build_nmv_cost_table(
304
305
306
307
308
309
        cpi->mb.nmvjointcost,
        cpi->mb.e_mbd.allow_high_precision_mv ?
        cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
        &cpi->common.fc.nmvc,
        cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
  }
John Koleszar's avatar
John Koleszar committed
310
311
}

312
int vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, int block_size) {
313
  int i, error = 0;
John Koleszar's avatar
John Koleszar committed
314

315
  for (i = 0; i < block_size; i++) {
John Koleszar's avatar
John Koleszar committed
316
317
318
    int this_diff = coeff[i] - dqcoeff[i];
    error += this_diff * this_diff;
  }
John Koleszar's avatar
John Koleszar committed
319

John Koleszar's avatar
John Koleszar committed
320
  return error;
John Koleszar's avatar
John Koleszar committed
321
322
}

323
int vp9_mbblock_error_c(MACROBLOCK *mb) {
John Koleszar's avatar
John Koleszar committed
324
325
326
327
  BLOCK  *be;
  BLOCKD *bd;
  int i, j;
  int berror, error = 0;
John Koleszar's avatar
John Koleszar committed
328

John Koleszar's avatar
John Koleszar committed
329
330
331
332
  for (i = 0; i < 16; i++) {
    be = &mb->block[i];
    bd = &mb->e_mbd.block[i];
    berror = 0;
333
    for (j = 0; j < 16; j++) {
John Koleszar's avatar
John Koleszar committed
334
335
      int this_diff = be->coeff[j] - bd->dqcoeff[j];
      berror += this_diff * this_diff;
John Koleszar's avatar
John Koleszar committed
336
    }
John Koleszar's avatar
John Koleszar committed
337
338
339
    error += berror;
  }
  return error;
John Koleszar's avatar
John Koleszar committed
340
341
}

342
int vp9_mbuverror_c(MACROBLOCK *mb) {
John Koleszar's avatar
John Koleszar committed
343
344
  BLOCK  *be;
  BLOCKD *bd;
John Koleszar's avatar
John Koleszar committed
345

346
  int i, error = 0;
John Koleszar's avatar
John Koleszar committed
347

John Koleszar's avatar
John Koleszar committed
348
349
350
  for (i = 16; i < 24; i++) {
    be = &mb->block[i];
    bd = &mb->e_mbd.block[i];
John Koleszar's avatar
John Koleszar committed
351

352
    error += vp9_block_error_c(be->coeff, bd->dqcoeff, 16);
John Koleszar's avatar
John Koleszar committed
353
  }
John Koleszar's avatar
John Koleszar committed
354

John Koleszar's avatar
John Koleszar committed
355
  return error;
John Koleszar's avatar
John Koleszar committed
356
357
}

358
int vp9_uvsse(MACROBLOCK *x) {
359
360
361
  uint8_t *uptr, *vptr;
  uint8_t *upred_ptr = (*(x->block[16].base_src) + x->block[16].src);
  uint8_t *vpred_ptr = (*(x->block[20].base_src) + x->block[20].src);
John Koleszar's avatar
John Koleszar committed
362
363
364
365
  int uv_stride = x->block[16].src_stride;

  unsigned int sse1 = 0;
  unsigned int sse2 = 0;
366
367
  int mv_row = x->e_mbd.mode_info_context->mbmi.mv[0].as_mv.row;
  int mv_col = x->e_mbd.mode_info_context->mbmi.mv[0].as_mv.col;
John Koleszar's avatar
John Koleszar committed
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
  int offset;
  int pre_stride = x->e_mbd.block[16].pre_stride;

  if (mv_row < 0)
    mv_row -= 1;
  else
    mv_row += 1;

  if (mv_col < 0)
    mv_col -= 1;
  else
    mv_col += 1;

  mv_row /= 2;
  mv_col /= 2;

  offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
  uptr = x->e_mbd.pre.u_buffer + offset;
  vptr = x->e_mbd.pre.v_buffer + offset;

  if ((mv_row | mv_col) & 7) {
389
    vp9_sub_pixel_variance8x8(uptr, pre_stride, (mv_col & 7) << 1,
390
                              (mv_row & 7) << 1, upred_ptr, uv_stride, &sse2);
391
    vp9_sub_pixel_variance8x8(vptr, pre_stride, (mv_col & 7) << 1,
392
                              (mv_row & 7) << 1, vpred_ptr, uv_stride, &sse1);
John Koleszar's avatar
John Koleszar committed
393
394
    sse2 += sse1;
  } else {
395
396
    vp9_variance8x8(uptr, pre_stride, upred_ptr, uv_stride, &sse2);
    vp9_variance8x8(vptr, pre_stride, vpred_ptr, uv_stride, &sse1);
John Koleszar's avatar
John Koleszar committed
397
398
399
    sse2 += sse1;
  }
  return sse2;
John Koleszar's avatar
John Koleszar committed
400
401
402

}

403
404
405
406
407
static INLINE int cost_coeffs(MACROBLOCK *mb,
                              BLOCKD *b, PLANE_TYPE type,
                              ENTROPY_CONTEXT *a,
                              ENTROPY_CONTEXT *l,
                              TX_SIZE tx_size) {
408
  int pt;
Daniel Kang's avatar
Daniel Kang committed
409
  const int eob = b->eob;
410
  MACROBLOCKD *xd = &mb->e_mbd;
411
  const int ib = (int)(b - xd->block);
412
  int c = 0;
413
414
  int cost = 0, seg_eob;
  const int segment_id = xd->mode_info_context->mbmi.segment_id;
415
  const int *scan;
416
417
418
  int16_t *qcoeff_ptr = b->qcoeff;
  const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
                          get_tx_type(xd, b) : DCT_DCT;
419
420
421
  unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
      (tx_type == DCT_DCT) ? mb->token_costs[tx_size][type] :
                             mb->hybrid_token_costs[tx_size][type];
422
  ENTROPY_CONTEXT a_ec = *a, l_ec = *l;
423

424
  switch (tx_size) {
Daniel Kang's avatar
Daniel Kang committed
425
    case TX_4X4:
426
427
      scan = vp9_default_zig_zag1d_4x4;
      seg_eob = 16;
Deb Mukherjee's avatar
Deb Mukherjee committed
428
      if (type == PLANE_TYPE_Y_WITH_DC) {
429
430
431
432
        if (tx_type == ADST_DCT) {
          scan = vp9_row_scan_4x4;
        } else if (tx_type == DCT_ADST) {
          scan = vp9_col_scan_4x4;
433
        }
Daniel Kang's avatar
Daniel Kang committed
434
435
436
      }
      break;
    case TX_8X8:
437
438
      scan = vp9_default_zig_zag1d_8x8;
      seg_eob = 64;
Daniel Kang's avatar
Daniel Kang committed
439
440
      break;
    case TX_16X16:
441
      scan = vp9_default_zig_zag1d_16x16;
442
443
444
445
      seg_eob = 256;
      if (type == PLANE_TYPE_UV) {
        const int uv_idx = ib - 16;
        qcoeff_ptr = xd->sb_coeff_data.qcoeff + 1024 + 64 * uv_idx;
Deb Mukherjee's avatar
Deb Mukherjee committed
446
      }
Daniel Kang's avatar
Daniel Kang committed
447
      break;
448
449
    case TX_32X32:
      scan = vp9_default_zig_zag1d_32x32;
450
      seg_eob = 1024;
451
452
      qcoeff_ptr = xd->sb_coeff_data.qcoeff;
      break;
Daniel Kang's avatar
Daniel Kang committed
453
    default:
454
      abort();
Daniel Kang's avatar
Daniel Kang committed
455
456
      break;
  }
457

458
459
  VP9_COMBINEENTROPYCONTEXTS(pt, a_ec, l_ec);

Paul Wilkins's avatar
Paul Wilkins committed
460
461
  if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))
    seg_eob = 0;
462

463
  if (tx_type != DCT_DCT) {
464
    int recent_energy = 0;
465
466
    for (; c < eob; c++) {
      int v = qcoeff_ptr[scan[c]];
467
      int t = vp9_dct_value_tokens_ptr[v].Token;
468
      cost += token_costs[get_coef_band(c)][pt][t];
469
      cost += vp9_dct_value_cost_ptr[v];
470
      pt = vp9_get_coef_context(&recent_energy, t);
471
472
    }
    if (c < seg_eob)
473
      cost += mb->hybrid_token_costs[tx_size][type][get_coef_band(c)]
474
          [pt][DCT_EOB_TOKEN];
475
  } else {
476
    int recent_energy = 0;
477
478
    for (; c < eob; c++) {
      int v = qcoeff_ptr[scan[c]];
479
      int t = vp9_dct_value_tokens_ptr[v].Token;
480
      cost += token_costs[get_coef_band(c)][pt][t];
481
      cost += vp9_dct_value_cost_ptr[v];
482
      pt = vp9_get_coef_context(&recent_energy, t);
483
484
    }
    if (c < seg_eob)
485
      cost += mb->token_costs[tx_size][type][get_coef_band(c)]
486
          [pt][DCT_EOB_TOKEN];
487
488
  }

489
  // is eob first coefficient;
490
  pt = (c > 0);
491
492
493
494
  *a = *l = pt;
  return cost;
}

495
static int rdcost_mby_4x4(MACROBLOCK *mb, int backup) {
John Koleszar's avatar
John Koleszar committed
496
497
  int cost = 0;
  int b;
Paul Wilkins's avatar
Paul Wilkins committed
498
  MACROBLOCKD *xd = &mb->e_mbd;
John Koleszar's avatar
John Koleszar committed
499
500
501
  ENTROPY_CONTEXT_PLANES t_above, t_left;
  ENTROPY_CONTEXT *ta;
  ENTROPY_CONTEXT *tl;
502

503
504
505
  if (backup) {
    vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
John Koleszar's avatar
John Koleszar committed
506

507
508
509
510
511
512
    ta = (ENTROPY_CONTEXT *)&t_above;
    tl = (ENTROPY_CONTEXT *)&t_left;
  } else {
    ta = (ENTROPY_CONTEXT *)xd->above_context;
    tl = (ENTROPY_CONTEXT *)xd->left_context;
  }
John Koleszar's avatar
John Koleszar committed
513

John Koleszar's avatar
John Koleszar committed
514
  for (b = 0; b < 16; b++)
515
    cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_Y_WITH_DC,
Yaowu Xu's avatar
Yaowu Xu committed
516
517
                        ta + vp9_block2above[TX_4X4][b],
                        tl + vp9_block2left[TX_4X4][b],
Daniel Kang's avatar
Daniel Kang committed
518
                        TX_4X4);
John Koleszar's avatar
John Koleszar committed
519

John Koleszar's avatar
John Koleszar committed
520
  return cost;
John Koleszar's avatar
John Koleszar committed
521
522
}

523
524
525
static void macro_block_yrd_4x4(MACROBLOCK *mb,
                                int *Rate,
                                int *Distortion,
526
                                int *skippable, int backup) {
Paul Wilkins's avatar
Paul Wilkins committed
527
  MACROBLOCKD *const xd = &mb->e_mbd;
528

529
530
531
  xd->mode_info_context->mbmi.txfm_size = TX_4X4;
  vp9_transform_mby_4x4(mb);
  vp9_quantize_mby_4x4(mb);
532

533
534
535
  *Distortion = vp9_mbblock_error(mb) >> 2;
  *Rate = rdcost_mby_4x4(mb, backup);
  *skippable = vp9_mby_is_skippable_4x4(xd);
536
}
John Koleszar's avatar
John Koleszar committed
537

538
static int rdcost_mby_8x8(MACROBLOCK *mb, int backup) {
John Koleszar's avatar
John Koleszar committed
539
540
  int cost = 0;
  int b;
Paul Wilkins's avatar
Paul Wilkins committed
541
  MACROBLOCKD *xd = &mb->e_mbd;
John Koleszar's avatar
John Koleszar committed
542
543
544
545
  ENTROPY_CONTEXT_PLANES t_above, t_left;
  ENTROPY_CONTEXT *ta;
  ENTROPY_CONTEXT *tl;

Ronald S. Bultje's avatar
Ronald S. Bultje committed
546
547
548
  if (backup) {
    vpx_memcpy(&t_above,xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
John Koleszar's avatar
John Koleszar committed
549

Ronald S. Bultje's avatar
Ronald S. Bultje committed
550
551
552
553
554
555
    ta = (ENTROPY_CONTEXT *)&t_above;
    tl = (ENTROPY_CONTEXT *)&t_left;
  } else {
    ta = (ENTROPY_CONTEXT *)mb->e_mbd.above_context;
    tl = (ENTROPY_CONTEXT *)mb->e_mbd.left_context;
  }
John Koleszar's avatar
John Koleszar committed
556
557

  for (b = 0; b < 16; b += 4)
558
    cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_Y_WITH_DC,
Yaowu Xu's avatar
Yaowu Xu committed
559
560
                        ta + vp9_block2above[TX_8X8][b],
                        tl + vp9_block2left[TX_8X8][b],
Daniel Kang's avatar
Daniel Kang committed
561
                        TX_8X8);
John Koleszar's avatar
John Koleszar committed
562
563

  return cost;
564
565
}

John Koleszar's avatar
John Koleszar committed
566
567
568
static void macro_block_yrd_8x8(MACROBLOCK *mb,
                                int *Rate,
                                int *Distortion,
569
                                int *skippable, int backup) {
Paul Wilkins's avatar
Paul Wilkins committed
570
  MACROBLOCKD *const xd = &mb->e_mbd;
571
572

  xd->mode_info_context->mbmi.txfm_size = TX_8X8;
573
574
  vp9_transform_mby_8x8(mb);
  vp9_quantize_mby_8x8(mb);
John Koleszar's avatar
John Koleszar committed
575

576
577
578
  *Distortion = vp9_mbblock_error(mb) >> 2;
  *Rate = rdcost_mby_8x8(mb, backup);
  *skippable = vp9_mby_is_skippable_8x8(xd);
579
}
580

581
static int rdcost_mby_16x16(MACROBLOCK *mb, int backup) {
Daniel Kang's avatar
Daniel Kang committed
582
  int cost;
Paul Wilkins's avatar
Paul Wilkins committed
583
  MACROBLOCKD *xd = &mb->e_mbd;
Daniel Kang's avatar
Daniel Kang committed
584
585
586
  ENTROPY_CONTEXT_PLANES t_above, t_left;
  ENTROPY_CONTEXT *ta, *tl;

587
588
589
  if (backup) {
    vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
Daniel Kang's avatar
Daniel Kang committed
590

591
592
593
594
595
596
    ta = (ENTROPY_CONTEXT *)&t_above;
    tl = (ENTROPY_CONTEXT *)&t_left;
  } else {
    ta = (ENTROPY_CONTEXT *)xd->above_context;
    tl = (ENTROPY_CONTEXT *)xd->left_context;
  }
Daniel Kang's avatar
Daniel Kang committed
597

Paul Wilkins's avatar
Paul Wilkins committed
598
  cost = cost_coeffs(mb, xd->block, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_16X16);
Daniel Kang's avatar
Daniel Kang committed
599
600
  return cost;
}
601

Daniel Kang's avatar
Daniel Kang committed
602
static void macro_block_yrd_16x16(MACROBLOCK *mb, int *Rate, int *Distortion,
603
                                  int *skippable, int backup) {
Deb Mukherjee's avatar
Deb Mukherjee committed
604
  MACROBLOCKD *xd = &mb->e_mbd;
Daniel Kang's avatar
Daniel Kang committed
605

606
607
  xd->mode_info_context->mbmi.txfm_size = TX_16X16;
  vp9_transform_mby_16x16(mb);
608
  vp9_quantize_mby_16x16(mb);
609
610
611
  // TODO(jingning) is it possible to quickly determine whether to force
  //                trailing coefficients to be zero, instead of running trellis
  //                optimization in the rate-distortion optimization loop?
612
613
  if (mb->optimize &&
      xd->mode_info_context->mbmi.mode < I8X8_PRED)
614
    vp9_optimize_mby_16x16(mb);
615

616
  *Distortion = vp9_mbblock_error(mb) >> 2;
617
  *Rate = rdcost_mby_16x16(mb, backup);
618
  *skippable = vp9_mby_is_skippable_16x16(xd);
Daniel Kang's avatar
Daniel Kang committed
619
620
}

621
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
622
623
624
625
626
                                     int (*r)[2], int *rate,
                                     int *d, int *distortion,
                                     int *s, int *skip,
                                     int64_t txfm_cache[NB_TXFM_MODES],
                                     TX_SIZE max_txfm_size) {
627
628
629
630
631
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
  vp9_prob skip_prob = cm->mb_no_coeff_skip ?
                       vp9_get_pred_prob(cm, xd, PRED_MBSKIP) : 128;
632
633
634
635
636
637
638
639
640
641
642
643
  int64_t rd[TX_SIZE_MAX_SB][2];
  int n, m;

  for (n = TX_4X4; n <= max_txfm_size; n++) {
    r[n][1] = r[n][0];
    for (m = 0; m <= n - (n == max_txfm_size); m++) {
      if (m == n)
        r[n][1] += vp9_cost_zero(cm->prob_tx[m]);
      else
        r[n][1] += vp9_cost_one(cm->prob_tx[m]);
    }
  }
644
645
646
647
648
649
650
651

  if (cm->mb_no_coeff_skip) {
    int s0, s1;

    assert(skip_prob > 0);
    s0 = vp9_cost_bit(skip_prob, 0);
    s1 = vp9_cost_bit(skip_prob, 1);

652
    for (n = TX_4X4; n <= max_txfm_size; n++) {
653
      if (s[n]) {
654
        rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
655
      } else {
656
657
        rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
        rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
658
      }
659
660
    }
  } else {
661
662
663
    for (n = TX_4X4; n <= max_txfm_size; n++) {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0], d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1], d[n]);
664
665
666
    }
  }

667
668
669
670
671
672
  if (max_txfm_size == TX_32X32 &&
      (cm->txfm_mode == ALLOW_32X32 ||
       (cm->txfm_mode == TX_MODE_SELECT &&
        rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
        rd[TX_32X32][1] < rd[TX_4X4][1]))) {
    mbmi->txfm_size = TX_32X32;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
673
674
675
676
677
  } else if ( cm->txfm_mode == ALLOW_16X16 ||
             (max_txfm_size == TX_16X16 && cm->txfm_mode == ALLOW_32X32) ||
             (cm->txfm_mode == TX_MODE_SELECT &&
              rd[TX_16X16][1] < rd[TX_8X8][1] &&
              rd[TX_16X16][1] < rd[TX_4X4][1])) {
678
    mbmi->txfm_size = TX_16X16;
679
  } else if (cm->txfm_mode == ALLOW_8X8 ||
680
           (cm->txfm_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
681
682
    mbmi->txfm_size = TX_8X8;
  } else {
683
    assert(cm->txfm_mode == ONLY_4X4 || cm->txfm_mode == TX_MODE_SELECT);
684
685
686
    mbmi->txfm_size = TX_4X4;
  }

687
  *distortion = d[mbmi->txfm_size];
688
  *rate       = r[mbmi->txfm_size][cm->txfm_mode == TX_MODE_SELECT];
689
690
  *skip       = s[mbmi->txfm_size];

691
692
693
694
695
696
697
698
  txfm_cache[ONLY_4X4] = rd[TX_4X4][0];
  txfm_cache[ALLOW_8X8] = rd[TX_8X8][0];
  txfm_cache[ALLOW_16X16] = rd[TX_16X16][0];
  txfm_cache[ALLOW_32X32] = rd[max_txfm_size][0];
  if (max_txfm_size == TX_32X32 &&
      rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
      rd[TX_32X32][1] < rd[TX_4X4][1])
    txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
Ronald S. Bultje's avatar
Ronald S. Bultje committed
699
  else if (rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
700
    txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
701
  else
702
703
    txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
                                 rd[TX_4X4][1] : rd[TX_8X8][1];
704
705
706
707
708
709
}

static void macro_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
                            int *distortion, int *skippable,
                            int64_t txfm_cache[NB_TXFM_MODES]) {
  MACROBLOCKD *const xd = &x->e_mbd;
710
  int r[TX_SIZE_MAX_MB][2], d[TX_SIZE_MAX_MB], s[TX_SIZE_MAX_MB];
711
712
713

  vp9_subtract_mby(x->src_diff, *(x->block[0].base_src), xd->predictor,
                   x->block[0].src_stride);
714

715
716
717
  macro_block_yrd_16x16(x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16], 1);
  macro_block_yrd_8x8(x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], 1);
  macro_block_yrd_4x4(x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], 1);
718
719

  choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skippable,
720
                           txfm_cache, TX_16X16);
721
722
}

723
static void copy_predictor(uint8_t *dst, const uint8_t *predictor) {
John Koleszar's avatar
John Koleszar committed
724
725
726
727
728
729
  const unsigned int *p = (const unsigned int *)predictor;
  unsigned int *d = (unsigned int *)dst;
  d[0] = p[0];
  d[4] = p[4];
  d[8] = p[8];
  d[12] = p[12];
730
}
Yaowu Xu's avatar
Yaowu Xu committed
731

Ronald S. Bultje's avatar
Ronald S. Bultje committed
732
static int rdcost_sby_32x32(MACROBLOCK *x, int backup) {
733
734
  MACROBLOCKD * const xd = &x->e_mbd;
  ENTROPY_CONTEXT_PLANES t_above, t_left;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
735
  ENTROPY_CONTEXT *ta, *tl;
736

Ronald S. Bultje's avatar
Ronald S. Bultje committed
737
738
739
740
741
742
743
744
745
746
  if (backup) {
    ta = (ENTROPY_CONTEXT *) &t_above,
    tl = (ENTROPY_CONTEXT *) &t_left;

    vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    vpx_memcpy(&t_left,  xd->left_context,  sizeof(ENTROPY_CONTEXT_PLANES));
  } else {
    ta = (ENTROPY_CONTEXT *) xd->above_context;
    tl = (ENTROPY_CONTEXT *) xd->left_context;
  }
747
748
749
750

  return cost_coeffs(x, xd->block, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_32X32);
}

751
752
static int vp9_sb_block_error_c(int16_t *coeff, int16_t *dqcoeff,
                                int block_size) {
753
754
755
756
757
758
759
760
761
762
763
764
765
  int i;
  int64_t error = 0;

  for (i = 0; i < block_size; i++) {
    unsigned int this_diff = coeff[i] - dqcoeff[i];
    error += this_diff * this_diff;
  }

  return error > INT_MAX ? INT_MAX : error;
}

#define DEBUG_ERROR 0
static void super_block_yrd_32x32(MACROBLOCK *x,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
766
767
                                  int *rate, int *distortion, int *skippable,
                                  int backup) {
768
769
770
  SUPERBLOCK  * const x_sb = &x->sb_coeff_data;
  MACROBLOCKD * const xd = &x->e_mbd;
  SUPERBLOCKD * const xd_sb = &xd->sb_coeff_data;
771
#if DEBUG_ERROR
772
  int16_t out[1024];
773
774
775
776
#endif

  vp9_transform_sby_32x32(x);
  vp9_quantize_sby_32x32(x);
777
#if DEBUG_ERROR
778
779
780
781
  vp9_short_idct32x32(xd_sb->dqcoeff, out, 64);
#endif

  *distortion = vp9_sb_block_error_c(x_sb->coeff, xd_sb->dqcoeff, 1024);
782

783
784
785
786
#if DEBUG_ERROR
  printf("IDCT/FDCT error 32x32: %d (d: %d)\n",
         vp9_block_error_c(x_sb->src_diff, out, 1024), *distortion);
#endif
Ronald S. Bultje's avatar
Ronald S. Bultje committed
787
  *rate       = rdcost_sby_32x32(x, backup);
788
789
790
  *skippable  = vp9_sby_is_skippable_32x32(&x->e_mbd);
}

791
792
static void super_block_yrd(VP9_COMP *cpi,
                            MACROBLOCK *x, int *rate, int *distortion,
793
                            int *skip,
794
                            int64_t txfm_cache[NB_TXFM_MODES]) {
Ronald S. Bultje's avatar
Ronald S. Bultje committed
795
  MACROBLOCKD *const xd = &x->e_mbd;
796
  int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB], n;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
797
798
  const uint8_t *src = x->src.y_buffer, *dst = xd->dst.y_buffer;
  int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride;
799
800
801
802
  ENTROPY_CONTEXT_PLANES t_above[TX_SIZE_MAX_MB][2],
                        *orig_above = xd->above_context;
  ENTROPY_CONTEXT_PLANES t_left[TX_SIZE_MAX_MB][2],
                        *orig_left = xd->left_context;
803

804
  for (n = TX_4X4; n < TX_SIZE_MAX_MB; n++) {
805
806
    vpx_memcpy(t_above[n], xd->above_context, sizeof(t_above[n]));
    vpx_memcpy(t_left[n], xd->left_context, sizeof(t_left[n]));
807
    r[n][0] = 0;
808
809
810
    d[n] = 0;
    s[n] = 1;
  }
Ronald S. Bultje's avatar
Ronald S. Bultje committed
811

812
813
  vp9_subtract_sby_s_c(x->sb_coeff_data.src_diff, src, src_y_stride,
                       dst, dst_y_stride);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
814
  super_block_yrd_32x32(x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32], 1);
815
816
817
818

#if DEBUG_ERROR
  int err[3] = { 0, 0, 0 };
#endif
Ronald S. Bultje's avatar
Ronald S. Bultje committed
819
820
  for (n = 0; n < 4; n++) {
    int x_idx = n & 1, y_idx = n >> 1;
821
    int r_tmp, d_tmp, s_tmp;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
822

823
    vp9_subtract_mby_s_c(x->src_diff,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
824
825
826
827
                         src + x_idx * 16 + y_idx * 16 * src_y_stride,
                         src_y_stride,
                         dst + x_idx * 16 + y_idx * 16 * dst_y_stride,
                         dst_y_stride);
828
829
830

    xd->above_context = &t_above[TX_16X16][x_idx];
    xd->left_context = &t_left[TX_16X16][y_idx];
831
    macro_block_yrd_16x16(x, &r_tmp, &d_tmp, &s_tmp, 0);
832
    d[TX_16X16] += d_tmp;
833
    r[TX_16X16][0] += r_tmp;
834
    s[TX_16X16] = s[TX_16X16] && s_tmp;
835
836
837
838
#if DEBUG_ERROR
    vp9_inverse_transform_mby_16x16(xd);
    err[2] += vp9_block_error_c(xd->diff, x->src_diff, 256);
#endif
839
840
841

    xd->above_context = &t_above[TX_4X4][x_idx];
    xd->left_context = &t_left[TX_4X4][y_idx];
842
    macro_block_yrd_4x4(x, &r_tmp, &d_tmp, &s_tmp, 0);
843
    d[TX_4X4] += d_tmp;
844
    r[TX_4X4][0] += r_tmp;
845
    s[TX_4X4] = s[TX_4X4] && s_tmp;
846
847
848
849
#if DEBUG_ERROR
    vp9_inverse_transform_mby_4x4(xd);
    err[0] += vp9_block_error_c(xd->diff, x->src_diff, 256);
#endif
850
851
852

    xd->above_context = &t_above[TX_8X8][x_idx];
    xd->left_context = &t_left[TX_8X8][y_idx];
853
    macro_block_yrd_8x8(x, &r_tmp, &d_tmp, &s_tmp, 0);
854
    d[TX_8X8] += d_tmp;
855
    r[TX_8X8][0] += r_tmp;
856
    s[TX_8X8] = s[TX_8X8] && s_tmp;
857
858
859
860
#if DEBUG_ERROR
    vp9_inverse_transform_mby_8x8(xd);
    err[1] += vp9_block_error_c(xd->diff, x->src_diff, 256);
#endif
Ronald S. Bultje's avatar
Ronald S. Bultje committed
861
  }
862
863
864
865
866
867
868
#if DEBUG_ERROR
  printf("IDCT/FDCT error 16x16: %d (d: %d)\n", err[2], d[2]);
  printf("IDCT/FDCT error 8x8: %d (d: %d)\n", err[1], d[1]);
  printf("IDCT/FDCT error 4x4: %d (d: %d)\n", err[0], d[0]);
#endif
  choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache,
                           TX_SIZE_MAX_SB - 1);
869
870
871

  xd->above_context = orig_above;
  xd->left_context = orig_left;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
872
}
Ronald S. Bultje's avatar
Ronald S. Bultje committed
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968

static void super_block_64_yrd(VP9_COMP *cpi,
                               MACROBLOCK *x, int *rate, int *distortion,
                               int *skip,
                               int64_t txfm_cache[NB_TXFM_MODES]) {
  MACROBLOCKD *const xd = &x->e_mbd;
  int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB], n;
  const uint8_t *src = x->src.y_buffer, *dst = xd->dst.y_buffer;
  int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride;
  ENTROPY_CONTEXT_PLANES t_above[TX_SIZE_MAX_SB][4],
                        *orig_above = xd->above_context;
  ENTROPY_CONTEXT_PLANES t_left[TX_SIZE_MAX_SB][4],
                        *orig_left = xd->left_context;

  for (n = TX_4X4; n < TX_SIZE_MAX_SB; n++) {
    vpx_memcpy(t_above[n], xd->above_context, sizeof(t_above[n]));
    vpx_memcpy(t_left[n], xd->left_context, sizeof(t_left[n]));
    r[n][0] = 0;
    d[n] = 0;
    s[n] = 1;
  }

  for (n = 0; n < 4; n++) {
    int x_idx = n & 1, y_idx = n >> 1;
    int r_tmp, d_tmp, s_tmp;

    xd->above_context = &t_above[TX_32X32][x_idx << 1];
    xd->left_context = &t_left[TX_32X32][y_idx << 1];
    vp9_subtract_sby_s_c(x->sb_coeff_data.src_diff,
                         src + 32 * x_idx + 32 * y_idx * src_y_stride,
                         src_y_stride,
                         dst + 32 * x_idx + 32 * y_idx * dst_y_stride,
                         dst_y_stride);
    super_block_yrd_32x32(x, &r_tmp, &d_tmp, &s_tmp, 0);
    r[TX_32X32][0] += r_tmp;
    d[TX_32X32] += d_tmp;
    s[TX_32X32] = s[TX_32X32] && s_tmp;
  }

#if DEBUG_ERROR
  int err[3] = { 0, 0, 0 };
#endif
  for (n = 0; n < 16; n++) {
    int x_idx = n & 3, y_idx = n >> 2;
    int r_tmp, d_tmp, s_tmp;

    vp9_subtract_mby_s_c(x->src_diff,
                         src + x_idx * 16 + y_idx * 16 * src_y_stride,
                         src_y_stride,
                         dst + x_idx * 16 + y_idx * 16 * dst_y_stride,
                         dst_y_stride);

    xd->above_context = &t_above[TX_16X16][x_idx];
    xd->left_context = &t_left[TX_16X16][y_idx];
    macro_block_yrd_16x16(x, &r_tmp, &d_tmp, &s_tmp, 0);
    d[TX_16X16] += d_tmp;
    r[TX_16X16][0] += r_tmp;
    s[TX_16X16] = s[TX_16X16] && s_tmp;
#if DEBUG_ERROR
    vp9_inverse_transform_mby_16x16(xd);
    err[2] += vp9_block_error_c(xd->diff, x->src_diff, 256);
#endif

    xd->above_context = &t_above[TX_4X4][x_idx];
    xd->left_context = &t_left[TX_4X4][y_idx];
    macro_block_yrd_4x4(x, &r_tmp, &d_tmp, &s_tmp, 0);
    d[TX_4X4] += d_tmp;
    r[TX_4X4][0] += r_tmp;
    s[TX_4X4] = s[TX_4X4] && s_tmp;
#if DEBUG_ERROR
    vp9_inverse_transform_mby_4x4(xd);
    err[0] += vp9_block_error_c(xd->diff, x->src_diff, 256);
#endif

    xd->above_context = &t_above[TX_8X8][x_idx];
    xd->left_context = &t_left[TX_8X8][y_idx];
    macro_block_yrd_8x8(x, &r_tmp, &d_tmp, &s_tmp, 0);
    d[TX_8X8] += d_tmp;
    r[TX_8X8][0] += r_tmp;
    s[TX_8X8] = s[TX_8X8] && s_tmp;
#if DEBUG_ERROR
    vp9_inverse_transform_mby_8x8(xd);
    err[1] += vp9_block_error_c(xd->diff, x->src_diff, 256);
#endif
  }
#if DEBUG_ERROR
  printf("IDCT/FDCT error 16x16: %d (d: %d)\n", err[2], d[2]);
  printf("IDCT/FDCT error 8x8: %d (d: %d)\n", err[1], d[1]);
  printf("IDCT/FDCT error 4x4: %d (d: %d)\n", err[0], d[0]);
#endif
  choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache,
                           TX_SIZE_MAX_SB - 1);

  xd->above_context = orig_above;
  xd->left_context = orig_left;
}
Ronald S. Bultje's avatar
Ronald S. Bultje committed
969

970
static void copy_predictor_8x8(uint8_t *dst, const uint8_t *predictor) {
John Koleszar's avatar
John Koleszar committed
971
972
973