vp9_rdopt.c 190 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5 6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9 10 11 12 13 14 15
 */


#include <stdio.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
16 17
#include "vp9/common/vp9_pragmas.h"

18 19 20 21 22
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_modecosts.h"
#include "vp9/encoder/vp9_encodeintra.h"
23 24 25 26 27
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_quant_common.h"
28 29 30 31 32 33
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_variance.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_ratectrl.h"
John Koleszar's avatar
John Koleszar committed
34
#include "vpx_mem/vpx_mem.h"
35 36
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
John Koleszar's avatar
John Koleszar committed
37

38 39 40
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_entropy.h"
41
#include "vp9_rtcd.h"
42
#include "vp9/common/vp9_mvref_common.h"
Ronald S. Bultje's avatar
Ronald S. Bultje committed
43
#include "vp9/common/vp9_common.h"
Paul Wilkins's avatar
Paul Wilkins committed
44

John Koleszar's avatar
John Koleszar committed
45 46
#define MAXF(a,b)            (((a) > (b)) ? (a) : (b))

47 48
#define INVALID_MV 0x80008000

49 50 51
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1

John Koleszar's avatar
John Koleszar committed
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
static const int auto_speed_thresh[17] = {
  1000,
  200,
  150,
  130,
  150,
  125,
  120,
  115,
  115,
  115,
  115,
  115,
  115,
  115,
  115,
  115,
  105
John Koleszar's avatar
John Koleszar committed
70 71
};

72
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
73 74
  {ZEROMV,    LAST_FRAME,   NONE},
  {DC_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
75

76 77
  {NEARESTMV, LAST_FRAME,   NONE},
  {NEARMV,    LAST_FRAME,   NONE},
John Koleszar's avatar
John Koleszar committed
78

79 80
  {ZEROMV,    GOLDEN_FRAME, NONE},
  {NEARESTMV, GOLDEN_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
81

82 83
  {ZEROMV,    ALTREF_FRAME, NONE},
  {NEARESTMV, ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
84

85 86
  {NEARMV,    GOLDEN_FRAME, NONE},
  {NEARMV,    ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
87

88 89 90 91 92 93 94 95
  {V_PRED,    INTRA_FRAME,  NONE},
  {H_PRED,    INTRA_FRAME,  NONE},
  {D45_PRED,  INTRA_FRAME,  NONE},
  {D135_PRED, INTRA_FRAME,  NONE},
  {D117_PRED, INTRA_FRAME,  NONE},
  {D153_PRED, INTRA_FRAME,  NONE},
  {D27_PRED,  INTRA_FRAME,  NONE},
  {D63_PRED,  INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
96

97
  {TM_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
98

99 100 101
  {NEWMV,     LAST_FRAME,   NONE},
  {NEWMV,     GOLDEN_FRAME, NONE},
  {NEWMV,     ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
102

103 104 105
  {SPLITMV,   LAST_FRAME,   NONE},
  {SPLITMV,   GOLDEN_FRAME, NONE},
  {SPLITMV,   ALTREF_FRAME, NONE},
106

107 108
  {B_PRED,    INTRA_FRAME,  NONE},
  {I8X8_PRED, INTRA_FRAME,  NONE},
109

John Koleszar's avatar
John Koleszar committed
110 111 112 113
  /* compound prediction modes */
  {ZEROMV,    LAST_FRAME,   GOLDEN_FRAME},
  {NEARESTMV, LAST_FRAME,   GOLDEN_FRAME},
  {NEARMV,    LAST_FRAME,   GOLDEN_FRAME},
114

John Koleszar's avatar
John Koleszar committed
115 116 117
  {ZEROMV,    ALTREF_FRAME, LAST_FRAME},
  {NEARESTMV, ALTREF_FRAME, LAST_FRAME},
  {NEARMV,    ALTREF_FRAME, LAST_FRAME},
118

John Koleszar's avatar
John Koleszar committed
119 120 121
  {ZEROMV,    GOLDEN_FRAME, ALTREF_FRAME},
  {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
  {NEARMV,    GOLDEN_FRAME, ALTREF_FRAME},
122

John Koleszar's avatar
John Koleszar committed
123 124 125
  {NEWMV,     LAST_FRAME,   GOLDEN_FRAME},
  {NEWMV,     ALTREF_FRAME, LAST_FRAME  },
  {NEWMV,     GOLDEN_FRAME, ALTREF_FRAME},
126

John Koleszar's avatar
John Koleszar committed
127 128
  {SPLITMV,   LAST_FRAME,   GOLDEN_FRAME},
  {SPLITMV,   ALTREF_FRAME, LAST_FRAME  },
129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
  {SPLITMV,   GOLDEN_FRAME, ALTREF_FRAME},

#if CONFIG_COMP_INTERINTRA_PRED
  /* compound inter-intra prediction */
  {ZEROMV,    LAST_FRAME,   INTRA_FRAME},
  {NEARESTMV, LAST_FRAME,   INTRA_FRAME},
  {NEARMV,    LAST_FRAME,   INTRA_FRAME},
  {NEWMV,     LAST_FRAME,   INTRA_FRAME},

  {ZEROMV,    GOLDEN_FRAME,   INTRA_FRAME},
  {NEARESTMV, GOLDEN_FRAME,   INTRA_FRAME},
  {NEARMV,    GOLDEN_FRAME,   INTRA_FRAME},
  {NEWMV,     GOLDEN_FRAME,   INTRA_FRAME},

  {ZEROMV,    ALTREF_FRAME,   INTRA_FRAME},
  {NEARESTMV, ALTREF_FRAME,   INTRA_FRAME},
  {NEARMV,    ALTREF_FRAME,   INTRA_FRAME},
  {NEWMV,     ALTREF_FRAME,   INTRA_FRAME},
#endif
John Koleszar's avatar
John Koleszar committed
148 149
};

150 151 152
static void fill_token_costs(vp9_coeff_count *c,
                             vp9_coeff_probs *p,
                             int block_type_counts) {
153
  int i, j, k, l;
John Koleszar's avatar
John Koleszar committed
154 155

  for (i = 0; i < block_type_counts; i++)
156 157 158 159 160 161 162 163 164 165 166 167
    for (j = 0; j < REF_TYPES; j++)
      for (k = 0; k < COEF_BANDS; k++)
        for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
          if (l == 0 && k > 0)
            vp9_cost_tokens_skip((int *)(c[i][j][k][l]),
                                 p[i][j][k][l],
                                 vp9_coef_tree);
          else
            vp9_cost_tokens((int *)(c[i][j][k][l]),
                            p[i][j][k][l],
                            vp9_coef_tree);
        }
John Koleszar's avatar
John Koleszar committed
168 169
}

170

171 172 173 174
static int rd_iifactor[32] =  { 4, 4, 3, 2, 1, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0, };
John Koleszar's avatar
John Koleszar committed
175

176
// 3* dc_qlookup[Q]*dc_qlookup[Q];
177

178
/* values are now correlated to quantizer */
Paul Wilkins's avatar
Paul Wilkins committed
179 180 181
static int sad_per_bit16lut[QINDEX_RANGE];
static int sad_per_bit4lut[QINDEX_RANGE];

182
void vp9_init_me_luts() {
John Koleszar's avatar
John Koleszar committed
183 184 185 186 187 188 189
  int i;

  // Initialize the sad lut tables using a formulaic calculation for now
  // This is to make it easier to resolve the impact of experimental changes
  // to the quantizer tables.
  for (i = 0; i < QINDEX_RANGE; i++) {
    sad_per_bit16lut[i] =
190 191
      (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
    sad_per_bit4lut[i] = (int)((0.063 * vp9_convert_qindex_to_q(i)) + 2.742);
John Koleszar's avatar
John Koleszar committed
192
  }
Paul Wilkins's avatar
Paul Wilkins committed
193
}
John Koleszar's avatar
John Koleszar committed
194

195
static int compute_rd_mult(int qindex) {
John Koleszar's avatar
John Koleszar committed
196
  int q;
197

198
  q = vp9_dc_quant(qindex, 0);
John Koleszar's avatar
John Koleszar committed
199
  return (11 * q * q) >> 6;
200 201
}

202
void vp9_initialize_me_consts(VP9_COMP *cpi, int QIndex) {
John Koleszar's avatar
John Koleszar committed
203 204
  cpi->mb.sadperbit16 =  sad_per_bit16lut[QIndex];
  cpi->mb.sadperbit4  =  sad_per_bit4lut[QIndex];
John Koleszar's avatar
John Koleszar committed
205 206
}

207

208
void vp9_initialize_rd_consts(VP9_COMP *cpi, int QIndex) {
209
  int q, i;
John Koleszar's avatar
John Koleszar committed
210

211
  vp9_clear_system_state();  // __asm emms;
John Koleszar's avatar
John Koleszar committed
212

John Koleszar's avatar
John Koleszar committed
213 214 215 216 217
  // Further tests required to see if optimum is different
  // for key frames, golden frames and arf frames.
  // if (cpi->common.refresh_golden_frame ||
  //     cpi->common.refresh_alt_ref_frame)
  QIndex = (QIndex < 0) ? 0 : ((QIndex > MAXQ) ? MAXQ : QIndex);
218

John Koleszar's avatar
John Koleszar committed
219
  cpi->RDMULT = compute_rd_mult(QIndex);
John Koleszar's avatar
John Koleszar committed
220

John Koleszar's avatar
John Koleszar committed
221 222 223 224 225 226 227
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
    if (cpi->twopass.next_iiratio > 31)
      cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
    else
      cpi->RDMULT +=
        (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
  }
John Koleszar's avatar
John Koleszar committed
228

John Koleszar's avatar
John Koleszar committed
229 230
  if (cpi->RDMULT < 7)
    cpi->RDMULT = 7;
231

John Koleszar's avatar
John Koleszar committed
232 233
  cpi->mb.errorperbit = (cpi->RDMULT / 110);
  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
234

235
  vp9_set_speed_features(cpi);
John Koleszar's avatar
John Koleszar committed
236

237
  q = (int)pow(vp9_dc_quant(QIndex, 0) >> 2, 1.25);
John Koleszar's avatar
John Koleszar committed
238 239
  q = q << 2;
  cpi->RDMULT = cpi->RDMULT << 4;
Paul Wilkins's avatar
Paul Wilkins committed
240

John Koleszar's avatar
John Koleszar committed
241 242
  if (q < 8)
    q = 8;
243

John Koleszar's avatar
John Koleszar committed
244 245 246
  if (cpi->RDMULT > 1000) {
    cpi->RDDIV = 1;
    cpi->RDMULT /= 100;
John Koleszar's avatar
John Koleszar committed
247

John Koleszar's avatar
John Koleszar committed
248 249 250 251 252 253
    for (i = 0; i < MAX_MODES; i++) {
      if (cpi->sf.thresh_mult[i] < INT_MAX) {
        cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
      } else {
        cpi->rd_threshes[i] = INT_MAX;
      }
John Koleszar's avatar
John Koleszar committed
254

John Koleszar's avatar
John Koleszar committed
255
      cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
John Koleszar's avatar
John Koleszar committed
256
    }
John Koleszar's avatar
John Koleszar committed
257 258
  } else {
    cpi->RDDIV = 100;
John Koleszar's avatar
John Koleszar committed
259

John Koleszar's avatar
John Koleszar committed
260 261 262 263 264 265
    for (i = 0; i < MAX_MODES; i++) {
      if (cpi->sf.thresh_mult[i] < (INT_MAX / q)) {
        cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
      } else {
        cpi->rd_threshes[i] = INT_MAX;
      }
John Koleszar's avatar
John Koleszar committed
266

John Koleszar's avatar
John Koleszar committed
267
      cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
John Koleszar's avatar
John Koleszar committed
268
    }
John Koleszar's avatar
John Koleszar committed
269
  }
John Koleszar's avatar
John Koleszar committed
270

271
  fill_token_costs(cpi->mb.token_costs[TX_4X4],
272
                   cpi->common.fc.coef_probs_4x4, BLOCK_TYPES);
273
  fill_token_costs(cpi->mb.token_costs[TX_8X8],
274
                   cpi->common.fc.coef_probs_8x8, BLOCK_TYPES);
275
  fill_token_costs(cpi->mb.token_costs[TX_16X16],
276
                   cpi->common.fc.coef_probs_16x16, BLOCK_TYPES);
277 278
  fill_token_costs(cpi->mb.token_costs[TX_32X32],
                   cpi->common.fc.coef_probs_32x32, BLOCK_TYPES_32X32);
279

John Koleszar's avatar
John Koleszar committed
280 281
  /*rough estimate for costing*/
  cpi->common.kf_ymode_probs_index = cpi->common.base_qindex >> 4;
282
  vp9_init_mode_costs(cpi);
John Koleszar's avatar
John Koleszar committed
283

284
  if (cpi->common.frame_type != KEY_FRAME) {
285
    vp9_build_nmv_cost_table(
286 287 288 289 290 291
        cpi->mb.nmvjointcost,
        cpi->mb.e_mbd.allow_high_precision_mv ?
        cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
        &cpi->common.fc.nmvc,
        cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
  }
John Koleszar's avatar
John Koleszar committed
292 293
}

294
int vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, int block_size) {
295
  int i, error = 0;
John Koleszar's avatar
John Koleszar committed
296

297
  for (i = 0; i < block_size; i++) {
John Koleszar's avatar
John Koleszar committed
298 299 300
    int this_diff = coeff[i] - dqcoeff[i];
    error += this_diff * this_diff;
  }
John Koleszar's avatar
John Koleszar committed
301

John Koleszar's avatar
John Koleszar committed
302
  return error;
John Koleszar's avatar
John Koleszar committed
303 304
}

305
int vp9_mbblock_error_c(MACROBLOCK *mb) {
John Koleszar's avatar
John Koleszar committed
306 307 308 309
  BLOCK  *be;
  BLOCKD *bd;
  int i, j;
  int berror, error = 0;
John Koleszar's avatar
John Koleszar committed
310

John Koleszar's avatar
John Koleszar committed
311 312 313 314
  for (i = 0; i < 16; i++) {
    be = &mb->block[i];
    bd = &mb->e_mbd.block[i];
    berror = 0;
315
    for (j = 0; j < 16; j++) {
John Koleszar's avatar
John Koleszar committed
316 317
      int this_diff = be->coeff[j] - bd->dqcoeff[j];
      berror += this_diff * this_diff;
John Koleszar's avatar
John Koleszar committed
318
    }
John Koleszar's avatar
John Koleszar committed
319 320 321
    error += berror;
  }
  return error;
John Koleszar's avatar
John Koleszar committed
322 323
}

324
int vp9_mbuverror_c(MACROBLOCK *mb) {
John Koleszar's avatar
John Koleszar committed
325 326
  BLOCK  *be;
  BLOCKD *bd;
John Koleszar's avatar
John Koleszar committed
327

328
  int i, error = 0;
John Koleszar's avatar
John Koleszar committed
329

John Koleszar's avatar
John Koleszar committed
330 331 332
  for (i = 16; i < 24; i++) {
    be = &mb->block[i];
    bd = &mb->e_mbd.block[i];
John Koleszar's avatar
John Koleszar committed
333

334
    error += vp9_block_error_c(be->coeff, bd->dqcoeff, 16);
John Koleszar's avatar
John Koleszar committed
335
  }
John Koleszar's avatar
John Koleszar committed
336

John Koleszar's avatar
John Koleszar committed
337
  return error;
John Koleszar's avatar
John Koleszar committed
338 339
}

340
int vp9_uvsse(MACROBLOCK *x) {
341 342 343
  uint8_t *uptr, *vptr;
  uint8_t *upred_ptr = (*(x->block[16].base_src) + x->block[16].src);
  uint8_t *vpred_ptr = (*(x->block[20].base_src) + x->block[20].src);
John Koleszar's avatar
John Koleszar committed
344 345 346 347
  int uv_stride = x->block[16].src_stride;

  unsigned int sse1 = 0;
  unsigned int sse2 = 0;
348 349
  int mv_row = x->e_mbd.mode_info_context->mbmi.mv[0].as_mv.row;
  int mv_col = x->e_mbd.mode_info_context->mbmi.mv[0].as_mv.col;
John Koleszar's avatar
John Koleszar committed
350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370
  int offset;
  int pre_stride = x->e_mbd.block[16].pre_stride;

  if (mv_row < 0)
    mv_row -= 1;
  else
    mv_row += 1;

  if (mv_col < 0)
    mv_col -= 1;
  else
    mv_col += 1;

  mv_row /= 2;
  mv_col /= 2;

  offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
  uptr = x->e_mbd.pre.u_buffer + offset;
  vptr = x->e_mbd.pre.v_buffer + offset;

  if ((mv_row | mv_col) & 7) {
371
    vp9_sub_pixel_variance8x8(uptr, pre_stride, (mv_col & 7) << 1,
372
                              (mv_row & 7) << 1, upred_ptr, uv_stride, &sse2);
373
    vp9_sub_pixel_variance8x8(vptr, pre_stride, (mv_col & 7) << 1,
374
                              (mv_row & 7) << 1, vpred_ptr, uv_stride, &sse1);
John Koleszar's avatar
John Koleszar committed
375 376
    sse2 += sse1;
  } else {
377 378
    vp9_variance8x8(uptr, pre_stride, upred_ptr, uv_stride, &sse2);
    vp9_variance8x8(vptr, pre_stride, vpred_ptr, uv_stride, &sse1);
John Koleszar's avatar
John Koleszar committed
379 380 381
    sse2 += sse1;
  }
  return sse2;
John Koleszar's avatar
John Koleszar committed
382 383 384

}

385 386 387 388 389
static INLINE int cost_coeffs(MACROBLOCK *mb,
                              BLOCKD *b, PLANE_TYPE type,
                              ENTROPY_CONTEXT *a,
                              ENTROPY_CONTEXT *l,
                              TX_SIZE tx_size) {
390
  int pt;
Daniel Kang's avatar
Daniel Kang committed
391
  const int eob = b->eob;
392
  MACROBLOCKD *xd = &mb->e_mbd;
393
  const int ib = (int)(b - xd->block);
394
  int c = 0;
395 396
  int cost = 0, seg_eob;
  const int segment_id = xd->mode_info_context->mbmi.segment_id;
397
  const int *scan;
398
  int16_t *qcoeff_ptr = b->qcoeff;
399
  const int ref = xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME;
400 401
  const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
                          get_tx_type(xd, b) : DCT_DCT;
402
  unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
403
      mb->token_costs[tx_size][type][ref];
404
  ENTROPY_CONTEXT a_ec = *a, l_ec = *l;
405

406
  switch (tx_size) {
Daniel Kang's avatar
Daniel Kang committed
407
    case TX_4X4:
408 409
      scan = vp9_default_zig_zag1d_4x4;
      seg_eob = 16;
Deb Mukherjee's avatar
Deb Mukherjee committed
410
      if (type == PLANE_TYPE_Y_WITH_DC) {
411 412 413 414
        if (tx_type == ADST_DCT) {
          scan = vp9_row_scan_4x4;
        } else if (tx_type == DCT_ADST) {
          scan = vp9_col_scan_4x4;
415
        }
Daniel Kang's avatar
Daniel Kang committed
416 417 418
      }
      break;
    case TX_8X8:
419 420
      scan = vp9_default_zig_zag1d_8x8;
      seg_eob = 64;
Daniel Kang's avatar
Daniel Kang committed
421 422
      break;
    case TX_16X16:
423
      scan = vp9_default_zig_zag1d_16x16;
424 425 426 427
      seg_eob = 256;
      if (type == PLANE_TYPE_UV) {
        const int uv_idx = ib - 16;
        qcoeff_ptr = xd->sb_coeff_data.qcoeff + 1024 + 64 * uv_idx;
Deb Mukherjee's avatar
Deb Mukherjee committed
428
      }
Daniel Kang's avatar
Daniel Kang committed
429
      break;
430 431
    case TX_32X32:
      scan = vp9_default_zig_zag1d_32x32;
432
      seg_eob = 1024;
433 434
      qcoeff_ptr = xd->sb_coeff_data.qcoeff;
      break;
Daniel Kang's avatar
Daniel Kang committed
435
    default:
436
      abort();
Daniel Kang's avatar
Daniel Kang committed
437 438
      break;
  }
439

440 441
  VP9_COMBINEENTROPYCONTEXTS(pt, a_ec, l_ec);

Paul Wilkins's avatar
Paul Wilkins committed
442 443
  if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))
    seg_eob = 0;
444

445
  {
446
    int recent_energy = 0;
447 448
    for (; c < eob; c++) {
      int v = qcoeff_ptr[scan[c]];
449
      int t = vp9_dct_value_tokens_ptr[v].Token;
450
      cost += token_costs[get_coef_band(tx_size, c)][pt][t];
451
      cost += vp9_dct_value_cost_ptr[v];
452
      pt = vp9_get_coef_context(&recent_energy, t);
453 454
    }
    if (c < seg_eob)
455
      cost += mb->token_costs[tx_size][type][ref][get_coef_band(tx_size, c)]
456
          [pt][DCT_EOB_TOKEN];
457 458
  }

459
  // is eob first coefficient;
460
  pt = (c > 0);
461 462 463 464
  *a = *l = pt;
  return cost;
}

465
static int rdcost_mby_4x4(MACROBLOCK *mb, int backup) {
John Koleszar's avatar
John Koleszar committed
466 467
  int cost = 0;
  int b;
Paul Wilkins's avatar
Paul Wilkins committed
468
  MACROBLOCKD *xd = &mb->e_mbd;
John Koleszar's avatar
John Koleszar committed
469 470 471
  ENTROPY_CONTEXT_PLANES t_above, t_left;
  ENTROPY_CONTEXT *ta;
  ENTROPY_CONTEXT *tl;
472

473 474 475
  if (backup) {
    vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
John Koleszar's avatar
John Koleszar committed
476

477 478 479 480 481 482
    ta = (ENTROPY_CONTEXT *)&t_above;
    tl = (ENTROPY_CONTEXT *)&t_left;
  } else {
    ta = (ENTROPY_CONTEXT *)xd->above_context;
    tl = (ENTROPY_CONTEXT *)xd->left_context;
  }
John Koleszar's avatar
John Koleszar committed
483

John Koleszar's avatar
John Koleszar committed
484
  for (b = 0; b < 16; b++)
485
    cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_Y_WITH_DC,
Yaowu Xu's avatar
Yaowu Xu committed
486 487
                        ta + vp9_block2above[TX_4X4][b],
                        tl + vp9_block2left[TX_4X4][b],
Daniel Kang's avatar
Daniel Kang committed
488
                        TX_4X4);
John Koleszar's avatar
John Koleszar committed
489

John Koleszar's avatar
John Koleszar committed
490
  return cost;
John Koleszar's avatar
John Koleszar committed
491 492
}

493 494 495
static void macro_block_yrd_4x4(MACROBLOCK *mb,
                                int *Rate,
                                int *Distortion,
496
                                int *skippable, int backup) {
Paul Wilkins's avatar
Paul Wilkins committed
497
  MACROBLOCKD *const xd = &mb->e_mbd;
498

499 500 501
  xd->mode_info_context->mbmi.txfm_size = TX_4X4;
  vp9_transform_mby_4x4(mb);
  vp9_quantize_mby_4x4(mb);
502

503 504 505
  *Distortion = vp9_mbblock_error(mb) >> 2;
  *Rate = rdcost_mby_4x4(mb, backup);
  *skippable = vp9_mby_is_skippable_4x4(xd);
506
}
John Koleszar's avatar
John Koleszar committed
507

508
static int rdcost_mby_8x8(MACROBLOCK *mb, int backup) {
John Koleszar's avatar
John Koleszar committed
509 510
  int cost = 0;
  int b;
Paul Wilkins's avatar
Paul Wilkins committed
511
  MACROBLOCKD *xd = &mb->e_mbd;
John Koleszar's avatar
John Koleszar committed
512 513 514 515
  ENTROPY_CONTEXT_PLANES t_above, t_left;
  ENTROPY_CONTEXT *ta;
  ENTROPY_CONTEXT *tl;

Ronald S. Bultje's avatar
Ronald S. Bultje committed
516 517 518
  if (backup) {
    vpx_memcpy(&t_above,xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
John Koleszar's avatar
John Koleszar committed
519

Ronald S. Bultje's avatar
Ronald S. Bultje committed
520 521 522 523 524 525
    ta = (ENTROPY_CONTEXT *)&t_above;
    tl = (ENTROPY_CONTEXT *)&t_left;
  } else {
    ta = (ENTROPY_CONTEXT *)mb->e_mbd.above_context;
    tl = (ENTROPY_CONTEXT *)mb->e_mbd.left_context;
  }
John Koleszar's avatar
John Koleszar committed
526 527

  for (b = 0; b < 16; b += 4)
528
    cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_Y_WITH_DC,
Yaowu Xu's avatar
Yaowu Xu committed
529 530
                        ta + vp9_block2above[TX_8X8][b],
                        tl + vp9_block2left[TX_8X8][b],
Daniel Kang's avatar
Daniel Kang committed
531
                        TX_8X8);
John Koleszar's avatar
John Koleszar committed
532 533

  return cost;
534 535
}

John Koleszar's avatar
John Koleszar committed
536 537 538
static void macro_block_yrd_8x8(MACROBLOCK *mb,
                                int *Rate,
                                int *Distortion,
539
                                int *skippable, int backup) {
Paul Wilkins's avatar
Paul Wilkins committed
540
  MACROBLOCKD *const xd = &mb->e_mbd;
541 542

  xd->mode_info_context->mbmi.txfm_size = TX_8X8;
543 544
  vp9_transform_mby_8x8(mb);
  vp9_quantize_mby_8x8(mb);
John Koleszar's avatar
John Koleszar committed
545

546 547 548
  *Distortion = vp9_mbblock_error(mb) >> 2;
  *Rate = rdcost_mby_8x8(mb, backup);
  *skippable = vp9_mby_is_skippable_8x8(xd);
549
}
550

551
static int rdcost_mby_16x16(MACROBLOCK *mb, int backup) {
Daniel Kang's avatar
Daniel Kang committed
552
  int cost;
Paul Wilkins's avatar
Paul Wilkins committed
553
  MACROBLOCKD *xd = &mb->e_mbd;
Daniel Kang's avatar
Daniel Kang committed
554 555 556
  ENTROPY_CONTEXT_PLANES t_above, t_left;
  ENTROPY_CONTEXT *ta, *tl;

557 558 559
  if (backup) {
    vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
Daniel Kang's avatar
Daniel Kang committed
560

561 562 563 564 565 566
    ta = (ENTROPY_CONTEXT *)&t_above;
    tl = (ENTROPY_CONTEXT *)&t_left;
  } else {
    ta = (ENTROPY_CONTEXT *)xd->above_context;
    tl = (ENTROPY_CONTEXT *)xd->left_context;
  }
Daniel Kang's avatar
Daniel Kang committed
567

Paul Wilkins's avatar
Paul Wilkins committed
568
  cost = cost_coeffs(mb, xd->block, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_16X16);
Daniel Kang's avatar
Daniel Kang committed
569 570
  return cost;
}
571

Daniel Kang's avatar
Daniel Kang committed
572
static void macro_block_yrd_16x16(MACROBLOCK *mb, int *Rate, int *Distortion,
573
                                  int *skippable, int backup) {
Deb Mukherjee's avatar
Deb Mukherjee committed
574
  MACROBLOCKD *xd = &mb->e_mbd;
Daniel Kang's avatar
Daniel Kang committed
575

576 577
  xd->mode_info_context->mbmi.txfm_size = TX_16X16;
  vp9_transform_mby_16x16(mb);
578
  vp9_quantize_mby_16x16(mb);
579 580 581
  // TODO(jingning) is it possible to quickly determine whether to force
  //                trailing coefficients to be zero, instead of running trellis
  //                optimization in the rate-distortion optimization loop?
582 583
  if (mb->optimize &&
      xd->mode_info_context->mbmi.mode < I8X8_PRED)
584
    vp9_optimize_mby_16x16(mb);
585

586
  *Distortion = vp9_mbblock_error(mb) >> 2;
587
  *Rate = rdcost_mby_16x16(mb, backup);
588
  *skippable = vp9_mby_is_skippable_16x16(xd);
Daniel Kang's avatar
Daniel Kang committed
589 590
}

591
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
592 593 594 595 596
                                     int (*r)[2], int *rate,
                                     int *d, int *distortion,
                                     int *s, int *skip,
                                     int64_t txfm_cache[NB_TXFM_MODES],
                                     TX_SIZE max_txfm_size) {
597 598 599 600 601
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
  vp9_prob skip_prob = cm->mb_no_coeff_skip ?
                       vp9_get_pred_prob(cm, xd, PRED_MBSKIP) : 128;
602 603 604 605 606 607 608 609 610 611 612 613
  int64_t rd[TX_SIZE_MAX_SB][2];
  int n, m;

  for (n = TX_4X4; n <= max_txfm_size; n++) {
    r[n][1] = r[n][0];
    for (m = 0; m <= n - (n == max_txfm_size); m++) {
      if (m == n)
        r[n][1] += vp9_cost_zero(cm->prob_tx[m]);
      else
        r[n][1] += vp9_cost_one(cm->prob_tx[m]);
    }
  }
614 615 616 617 618 619 620 621

  if (cm->mb_no_coeff_skip) {
    int s0, s1;

    assert(skip_prob > 0);
    s0 = vp9_cost_bit(skip_prob, 0);
    s1 = vp9_cost_bit(skip_prob, 1);

622
    for (n = TX_4X4; n <= max_txfm_size; n++) {
623
      if (s[n]) {
624
        rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
625
      } else {
626 627
        rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
        rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
628
      }
629 630
    }
  } else {
631 632 633
    for (n = TX_4X4; n <= max_txfm_size; n++) {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0], d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1], d[n]);
634 635 636
    }
  }

637 638 639 640 641 642
  if (max_txfm_size == TX_32X32 &&
      (cm->txfm_mode == ALLOW_32X32 ||
       (cm->txfm_mode == TX_MODE_SELECT &&
        rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
        rd[TX_32X32][1] < rd[TX_4X4][1]))) {
    mbmi->txfm_size = TX_32X32;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
643 644 645 646 647
  } else if ( cm->txfm_mode == ALLOW_16X16 ||
             (max_txfm_size == TX_16X16 && cm->txfm_mode == ALLOW_32X32) ||
             (cm->txfm_mode == TX_MODE_SELECT &&
              rd[TX_16X16][1] < rd[TX_8X8][1] &&
              rd[TX_16X16][1] < rd[TX_4X4][1])) {
648
    mbmi->txfm_size = TX_16X16;
649
  } else if (cm->txfm_mode == ALLOW_8X8 ||
650
           (cm->txfm_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
651 652
    mbmi->txfm_size = TX_8X8;
  } else {
653
    assert(cm->txfm_mode == ONLY_4X4 || cm->txfm_mode == TX_MODE_SELECT);
654 655 656
    mbmi->txfm_size = TX_4X4;
  }

657
  *distortion = d[mbmi->txfm_size];
658
  *rate       = r[mbmi->txfm_size][cm->txfm_mode == TX_MODE_SELECT];
659 660
  *skip       = s[mbmi->txfm_size];

661 662 663 664 665 666 667 668
  txfm_cache[ONLY_4X4] = rd[TX_4X4][0];
  txfm_cache[ALLOW_8X8] = rd[TX_8X8][0];
  txfm_cache[ALLOW_16X16] = rd[TX_16X16][0];
  txfm_cache[ALLOW_32X32] = rd[max_txfm_size][0];
  if (max_txfm_size == TX_32X32 &&
      rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
      rd[TX_32X32][1] < rd[TX_4X4][1])
    txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
Ronald S. Bultje's avatar
Ronald S. Bultje committed
669
  else if (rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
670
    txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
671
  else
672 673
    txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
                                 rd[TX_4X4][1] : rd[TX_8X8][1];
674 675 676 677 678 679
}

static void macro_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
                            int *distortion, int *skippable,
                            int64_t txfm_cache[NB_TXFM_MODES]) {
  MACROBLOCKD *const xd = &x->e_mbd;
680
  int r[TX_SIZE_MAX_MB][2], d[TX_SIZE_MAX_MB], s[TX_SIZE_MAX_MB];
681 682 683

  vp9_subtract_mby(x->src_diff, *(x->block[0].base_src), xd->predictor,
                   x->block[0].src_stride);
684

685 686 687
  macro_block_yrd_16x16(x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16], 1);
  macro_block_yrd_8x8(x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], 1);
  macro_block_yrd_4x4(x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], 1);
688 689

  choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skippable,
690
                           txfm_cache, TX_16X16);
691 692
}

693
static void copy_predictor(uint8_t *dst, const uint8_t *predictor) {
John Koleszar's avatar
John Koleszar committed
694 695 696 697 698 699
  const unsigned int *p = (const unsigned int *)predictor;
  unsigned int *d = (unsigned int *)dst;
  d[0] = p[0];
  d[4] = p[4];
  d[8] = p[8];
  d[12] = p[12];
700
}
Yaowu Xu's avatar
Yaowu Xu committed
701

Ronald S. Bultje's avatar
Ronald S. Bultje committed
702
static int rdcost_sby_32x32(MACROBLOCK *x, int backup) {
703 704
  MACROBLOCKD * const xd = &x->e_mbd;
  ENTROPY_CONTEXT_PLANES t_above, t_left;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
705
  ENTROPY_CONTEXT *ta, *tl;
706

Ronald S. Bultje's avatar
Ronald S. Bultje committed
707 708 709 710 711 712 713 714 715 716
  if (backup) {
    ta = (ENTROPY_CONTEXT *) &t_above,
    tl = (ENTROPY_CONTEXT *) &t_left;

    vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    vpx_memcpy(&t_left,  xd->left_context,  sizeof(ENTROPY_CONTEXT_PLANES));
  } else {
    ta = (ENTROPY_CONTEXT *) xd->above_context;
    tl = (ENTROPY_CONTEXT *) xd->left_context;
  }
717 718 719 720

  return cost_coeffs(x, xd->block, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_32X32);
}

721 722
static int vp9_sb_block_error_c(int16_t *coeff, int16_t *dqcoeff,
                                int block_size) {
723 724 725 726 727 728 729 730 731 732 733 734 735
  int i;
  int64_t error = 0;

  for (i = 0; i < block_size; i++) {
    unsigned int this_diff = coeff[i] - dqcoeff[i];
    error += this_diff * this_diff;
  }

  return error > INT_MAX ? INT_MAX : error;
}

#define DEBUG_ERROR 0
static void super_block_yrd_32x32(MACROBLOCK *x,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
736 737
                                  int *rate, int *distortion, int *skippable,
                                  int backup) {
738 739 740
  SUPERBLOCK  * const x_sb = &x->sb_coeff_data;
  MACROBLOCKD * const xd = &x->e_mbd;
  SUPERBLOCKD * const xd_sb = &xd->sb_coeff_data;
741
#if DEBUG_ERROR
742
  int16_t out[1024];
743 744 745 746
#endif

  vp9_transform_sby_32x32(x);
  vp9_quantize_sby_32x32(x);
747
#if DEBUG_ERROR
748 749 750 751
  vp9_short_idct32x32(xd_sb->dqcoeff, out, 64);
#endif

  *distortion = vp9_sb_block_error_c(x_sb->coeff, xd_sb->dqcoeff, 1024);
752

753 754 755 756
#if DEBUG_ERROR
  printf("IDCT/FDCT error 32x32: %d (d: %d)\n",
         vp9_block_error_c(x_sb->src_diff, out, 1024), *distortion);
#endif
Ronald S. Bultje's avatar
Ronald S. Bultje committed
757
  *rate       = rdcost_sby_32x32(x, backup);
758 759 760
  *skippable  = vp9_sby_is_skippable_32x32(&x->e_mbd);
}

761 762
static void super_block_yrd(VP9_COMP *cpi,
                            MACROBLOCK *x, int *rate, int *distortion,
763
                            int *skip,
764
                            int64_t txfm_cache[NB_TXFM_MODES]) {
Ronald S. Bultje's avatar
Ronald S. Bultje committed
765
  MACROBLOCKD *const xd = &x->e_mbd;
766
  int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB], n;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
767 768
  const uint8_t *src = x->src.y_buffer, *dst = xd->dst.y_buffer;
  int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride;
769 770 771 772
  ENTROPY_CONTEXT_PLANES t_above[TX_SIZE_MAX_MB][2],
                        *orig_above = xd->above_context;
  ENTROPY_CONTEXT_PLANES t_left[TX_SIZE_MAX_MB][2],
                        *orig_left = xd->left_context;
773

774
  for (n = TX_4X4; n < TX_SIZE_MAX_MB; n++) {
775 776
    vpx_memcpy(t_above[n], xd->above_context, sizeof(t_above[n]));
    vpx_memcpy(t_left[n], xd->left_context, sizeof(t_left[n]));
777
    r[n][0] = 0;
778 779 780
    d[n] = 0;
    s[n] = 1;
  }
Ronald S. Bultje's avatar
Ronald S. Bultje committed
781

782 783
  vp9_subtract_sby_s_c(x->sb_coeff_data.src_diff, src, src_y_stride,
                       dst, dst_y_stride);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
784
  super_block_yrd_32x32(x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32], 1);
785 786 787 788

#if DEBUG_ERROR
  int err[3] = { 0, 0, 0 };
#endif
Ronald S. Bultje's avatar
Ronald S. Bultje committed
789 790
  for (n = 0; n < 4; n++) {
    int x_idx = n & 1, y_idx = n >> 1;
791
    int r_tmp, d_tmp, s_tmp;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
792

793
    vp9_subtract_mby_s_c(x->src_diff,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
794 795 796 797
                         src + x_idx * 16 + y_idx * 16 * src_y_stride,
                         src_y_stride,
                         dst + x_idx * 16 + y_idx * 16 * dst_y_stride,
                         dst_y_stride);
798 799 800

    xd->above_context = &t_above[TX_16X16][x_idx];
    xd->left_context = &t_left[TX_16X16][y_idx];
801
    macro_block_yrd_16x16(x, &r_tmp, &d_tmp, &s_tmp, 0);
802
    d[TX_16X16] += d_tmp;
803
    r[TX_16X16][0] += r_tmp;
804
    s[TX_16X16] = s[TX_16X16] && s_tmp;
805 806 807 808
#if DEBUG_ERROR
    vp9_inverse_transform_mby_16x16(xd);
    err[2] += vp9_block_error_c(xd->diff, x->src_diff, 256);
#endif
809 810 811

    xd->above_context = &t_above[TX_4X4][x_idx];
    xd->left_context = &t_left[TX_4X4][y_idx];
812
    macro_block_yrd_4x4(x, &r_tmp, &d_tmp, &s_tmp, 0);
813
    d[TX_4X4] += d_tmp;
814
    r[TX_4X4][0] += r_tmp;
815
    s[TX_4X4] = s[TX_4X4] && s_tmp;
816 817 818 819
#if DEBUG_ERROR
    vp9_inverse_transform_mby_4x4(xd);
    err[0] += vp9_block_error_c(xd->diff, x->src_diff, 256);
#endif
820 821 822

    xd->above_context = &t_above[TX_8X8][x_idx];
    xd->left_context = &t_left[TX_8X8][y_idx];
823
    macro_block_yrd_8x8(x, &r_tmp, &d_tmp, &s_tmp, 0);
824
    d[TX_8X8] += d_tmp;
825
    r[TX_8X8][0] += r_tmp;
826
    s[TX_8X8] = s[TX_8X8] && s_tmp;
827 828 829 830
#if DEBUG_ERROR
    vp9_inverse_transform_mby_8x8(xd);
    err[1] += vp9_block_error_c(xd->diff, x->src_diff, 256);
#endif
Ronald S. Bultje's avatar
Ronald S. Bultje committed
831
  }
832 833 834 835 836 837 838
#if DEBUG_ERROR
  printf("IDCT/FDCT error 16x16: %d (d: %d)\n", err[2], d[2]);
  printf("IDCT/FDCT error 8x8: %d (d: %d)\n", err[1], d[1]);
  printf("IDCT/FDCT error 4x4: %d (d: %d)\n", err[0], d[0]);
#endif
  choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache,
                           TX_SIZE_MAX_SB - 1);
839 840 841

  xd->above_context = orig_above;
  xd->left_context = orig_left;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
842
}
Ronald S. Bultje's avatar
Ronald S. Bultje committed
843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938

static void super_block_64_yrd(VP9_COMP *cpi,
                               MACROBLOCK *x, int *rate, int *distortion,
                               int *skip,
                               int64_t txfm_cache[NB_TXFM_MODES]) {
  MACROBLOCKD *const xd = &x->e_mbd;
  int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB], n;
  const uint8_t *src = x->src.y_buffer, *dst = xd->dst.y_buffer;
  int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride;
  ENTROPY_CONTEXT_PLANES t_above[TX_SIZE_MAX_SB][4],
                        *orig_above = xd->above_context;
  ENTROPY_CONTEXT_PLANES t_left[TX_SIZE_MAX_SB][4],
                        *orig_left = xd->left_context;

  for (n = TX_4X4; n < TX_SIZE_MAX_SB; n++) {
    vpx_memcpy(t_above[n], xd->above_context, sizeof(t_above[n]));
    vpx_memcpy(t_left[n], xd->left_context, sizeof(t_left[n]));
    r[n][0] = 0;
    d[n] = 0;
    s[n] = 1;
  }

  for (n = 0; n < 4; n++) {
    int x_idx = n & 1, y_idx = n >> 1;
    int r_tmp, d_tmp, s_tmp;

    xd->above_context = &t_above[TX_32X32][x_idx << 1];
    xd->left_context = &t_left[TX_32X32][y_idx << 1];
    vp9_subtract_sby_s_c(x->sb_coeff_data.src_diff,
                         src + 32 * x_idx + 32 * y_idx * src_y_stride,
                         src_y_stride,
                         dst + 32 * x_idx + 32 * y_idx * dst_y_stride,
                         dst_y_stride);
    super_block_yrd_32x32(x, &r_tmp, &d_tmp, &s_tmp, 0);
    r[TX_32X32][0] += r_tmp;
    d[TX_32X32] += d_tmp;
    s[TX_32X32] = s[TX_32X32] && s_tmp;
  }

#if DEBUG_ERROR
  int err[3] = { 0, 0, 0 };
#endif
  for (n = 0; n < 16; n++) {
    int x_idx = n & 3, y_idx = n >> 2;
    int r_tmp, d_tmp, s_tmp;

    vp9_subtract_mby_s_c(x->src_diff,
                         src + x_idx * 16 + y_idx * 16 * src_y_stride,
                         src_y_stride,
                         dst + x_idx * 16 + y_idx * 16 * dst_y_stride,
                         dst_y_stride);

    xd->above_context = &t_above[TX_16X16][x_idx];
    xd->left_context = &t_left[TX_16X16][y_idx];
    macro_block_yrd_16x16(x, &r_tmp, &d_tmp, &s_tmp, 0);
    d[TX_16X16] += d_tmp;
    r[TX_16X16][0] += r_tmp;
    s[TX_16X16] = s[TX_16X16] && s_tmp;
#if DEBUG_ERROR
    vp9_inverse_transform_mby_16x16(xd);
    err[2] += vp9_block_error_c(xd->diff, x->src_diff, 256);
#endif

    xd->above_context = &t_above[TX_4X4][x_idx];
    xd->left_context = &t_left[TX_4X4][y_idx];
    macro_block_yrd_4x4(x, &r_tmp, &d_tmp, &s_tmp, 0);
    d[TX_4X4] += d_tmp;
    r[TX_4X4][0] += r_tmp;
    s[TX_4X4] = s[TX_4X4] && s_tmp;
#if DEBUG_ERROR
    vp9_inverse_transform_mby_4x4(xd);
    err[0] += vp9_block_error_c(xd->diff, x->src_diff, 256);
#endif

    xd->above_context = &t_above[TX_8X8][x_idx];
    xd->left_context = &t_left[TX_8X8][y_idx];
    macro_block_yrd_8x8(x, &r_tmp, &d_tmp, &s_tmp, 0);
    d[TX_8X8] += d_tmp;
    r[TX_8X8][0] += r_tmp;
    s[TX_8X8] = s[TX_8X8] && s_tmp;
#if DEBUG_ERROR
    vp9_inverse_transform_mby_8x8(xd);
    err[1] += vp9_block_error_c(xd->diff, x->src_diff, 256);
#endif
  }
#if DEBUG_ERROR
  printf("IDCT/FDCT error 16x16: %d (d: %d)\n", err[2], d[2]);
  printf("IDCT/FDCT error 8x8: %d (d: %d)\n", err[1], d[1]);
  printf("IDCT/FDCT error 4x4: %d (d: %d)\n", err[0], d[0]);
#endif
  choose_txfm_size_from_rd(cpi,