vp9_rdopt.c 190 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
11
12
13
14
15
 */


#include <stdio.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
16
17
#include "vp9/common/vp9_pragmas.h"

18
19
20
21
22
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_modecosts.h"
#include "vp9/encoder/vp9_encodeintra.h"
23
24
25
26
27
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_quant_common.h"
28
29
30
31
32
33
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_variance.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_ratectrl.h"
John Koleszar's avatar
John Koleszar committed
34
#include "vpx_mem/vpx_mem.h"
35
36
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
John Koleszar's avatar
John Koleszar committed
37

38
39
40
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_entropy.h"
41
#include "vp9_rtcd.h"
42
#include "vp9/common/vp9_mvref_common.h"
Ronald S. Bultje's avatar
Ronald S. Bultje committed
43
#include "vp9/common/vp9_common.h"
Paul Wilkins's avatar
Paul Wilkins committed
44

John Koleszar's avatar
John Koleszar committed
45
46
#define MAXF(a,b)            (((a) > (b)) ? (a) : (b))

47
48
#define INVALID_MV 0x80008000

49
50
51
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1

John Koleszar's avatar
John Koleszar committed
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
static const int auto_speed_thresh[17] = {
  1000,
  200,
  150,
  130,
  150,
  125,
  120,
  115,
  115,
  115,
  115,
  115,
  115,
  115,
  115,
  115,
  105
John Koleszar's avatar
John Koleszar committed
70
71
};

72
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
73
74
  {ZEROMV,    LAST_FRAME,   NONE},
  {DC_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
75

76
77
  {NEARESTMV, LAST_FRAME,   NONE},
  {NEARMV,    LAST_FRAME,   NONE},
John Koleszar's avatar
John Koleszar committed
78

79
80
  {ZEROMV,    GOLDEN_FRAME, NONE},
  {NEARESTMV, GOLDEN_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
81

82
83
  {ZEROMV,    ALTREF_FRAME, NONE},
  {NEARESTMV, ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
84

85
86
  {NEARMV,    GOLDEN_FRAME, NONE},
  {NEARMV,    ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
87

88
89
90
91
92
93
94
95
  {V_PRED,    INTRA_FRAME,  NONE},
  {H_PRED,    INTRA_FRAME,  NONE},
  {D45_PRED,  INTRA_FRAME,  NONE},
  {D135_PRED, INTRA_FRAME,  NONE},
  {D117_PRED, INTRA_FRAME,  NONE},
  {D153_PRED, INTRA_FRAME,  NONE},
  {D27_PRED,  INTRA_FRAME,  NONE},
  {D63_PRED,  INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
96

97
  {TM_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
98

99
100
101
  {NEWMV,     LAST_FRAME,   NONE},
  {NEWMV,     GOLDEN_FRAME, NONE},
  {NEWMV,     ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
102

103
104
105
  {SPLITMV,   LAST_FRAME,   NONE},
  {SPLITMV,   GOLDEN_FRAME, NONE},
  {SPLITMV,   ALTREF_FRAME, NONE},
106

107
108
  {B_PRED,    INTRA_FRAME,  NONE},
  {I8X8_PRED, INTRA_FRAME,  NONE},
109

John Koleszar's avatar
John Koleszar committed
110
111
112
113
  /* compound prediction modes */
  {ZEROMV,    LAST_FRAME,   GOLDEN_FRAME},
  {NEARESTMV, LAST_FRAME,   GOLDEN_FRAME},
  {NEARMV,    LAST_FRAME,   GOLDEN_FRAME},
114

John Koleszar's avatar
John Koleszar committed
115
116
117
  {ZEROMV,    ALTREF_FRAME, LAST_FRAME},
  {NEARESTMV, ALTREF_FRAME, LAST_FRAME},
  {NEARMV,    ALTREF_FRAME, LAST_FRAME},
118

John Koleszar's avatar
John Koleszar committed
119
120
121
  {ZEROMV,    GOLDEN_FRAME, ALTREF_FRAME},
  {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
  {NEARMV,    GOLDEN_FRAME, ALTREF_FRAME},
122

John Koleszar's avatar
John Koleszar committed
123
124
125
  {NEWMV,     LAST_FRAME,   GOLDEN_FRAME},
  {NEWMV,     ALTREF_FRAME, LAST_FRAME  },
  {NEWMV,     GOLDEN_FRAME, ALTREF_FRAME},
126

John Koleszar's avatar
John Koleszar committed
127
128
  {SPLITMV,   LAST_FRAME,   GOLDEN_FRAME},
  {SPLITMV,   ALTREF_FRAME, LAST_FRAME  },
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
  {SPLITMV,   GOLDEN_FRAME, ALTREF_FRAME},

#if CONFIG_COMP_INTERINTRA_PRED
  /* compound inter-intra prediction */
  {ZEROMV,    LAST_FRAME,   INTRA_FRAME},
  {NEARESTMV, LAST_FRAME,   INTRA_FRAME},
  {NEARMV,    LAST_FRAME,   INTRA_FRAME},
  {NEWMV,     LAST_FRAME,   INTRA_FRAME},

  {ZEROMV,    GOLDEN_FRAME,   INTRA_FRAME},
  {NEARESTMV, GOLDEN_FRAME,   INTRA_FRAME},
  {NEARMV,    GOLDEN_FRAME,   INTRA_FRAME},
  {NEWMV,     GOLDEN_FRAME,   INTRA_FRAME},

  {ZEROMV,    ALTREF_FRAME,   INTRA_FRAME},
  {NEARESTMV, ALTREF_FRAME,   INTRA_FRAME},
  {NEARMV,    ALTREF_FRAME,   INTRA_FRAME},
  {NEWMV,     ALTREF_FRAME,   INTRA_FRAME},
#endif
John Koleszar's avatar
John Koleszar committed
148
149
};

150
151
152
static void fill_token_costs(vp9_coeff_count *c,
                             vp9_coeff_probs *p,
                             int block_type_counts) {
153
  int i, j, k, l;
John Koleszar's avatar
John Koleszar committed
154
155

  for (i = 0; i < block_type_counts; i++)
156
157
158
159
160
161
162
163
164
165
166
167
    for (j = 0; j < REF_TYPES; j++)
      for (k = 0; k < COEF_BANDS; k++)
        for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
          if (l == 0 && k > 0)
            vp9_cost_tokens_skip((int *)(c[i][j][k][l]),
                                 p[i][j][k][l],
                                 vp9_coef_tree);
          else
            vp9_cost_tokens((int *)(c[i][j][k][l]),
                            p[i][j][k][l],
                            vp9_coef_tree);
        }
John Koleszar's avatar
John Koleszar committed
168
169
}

170

171
172
173
174
static int rd_iifactor[32] =  { 4, 4, 3, 2, 1, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0, };
John Koleszar's avatar
John Koleszar committed
175

176
// 3* dc_qlookup[Q]*dc_qlookup[Q];
177

178
/* values are now correlated to quantizer */
Paul Wilkins's avatar
Paul Wilkins committed
179
180
181
static int sad_per_bit16lut[QINDEX_RANGE];
static int sad_per_bit4lut[QINDEX_RANGE];

182
void vp9_init_me_luts() {
John Koleszar's avatar
John Koleszar committed
183
184
185
186
187
188
189
  int i;

  // Initialize the sad lut tables using a formulaic calculation for now
  // This is to make it easier to resolve the impact of experimental changes
  // to the quantizer tables.
  for (i = 0; i < QINDEX_RANGE; i++) {
    sad_per_bit16lut[i] =
190
191
      (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
    sad_per_bit4lut[i] = (int)((0.063 * vp9_convert_qindex_to_q(i)) + 2.742);
John Koleszar's avatar
John Koleszar committed
192
  }
Paul Wilkins's avatar
Paul Wilkins committed
193
}
John Koleszar's avatar
John Koleszar committed
194

195
static int compute_rd_mult(int qindex) {
John Koleszar's avatar
John Koleszar committed
196
  int q;
197

198
  q = vp9_dc_quant(qindex, 0);
John Koleszar's avatar
John Koleszar committed
199
  return (11 * q * q) >> 6;
200
201
}

202
void vp9_initialize_me_consts(VP9_COMP *cpi, int QIndex) {
John Koleszar's avatar
John Koleszar committed
203
204
  cpi->mb.sadperbit16 =  sad_per_bit16lut[QIndex];
  cpi->mb.sadperbit4  =  sad_per_bit4lut[QIndex];
John Koleszar's avatar
John Koleszar committed
205
206
}

207

208
void vp9_initialize_rd_consts(VP9_COMP *cpi, int QIndex) {
209
  int q, i;
John Koleszar's avatar
John Koleszar committed
210

211
  vp9_clear_system_state();  // __asm emms;
John Koleszar's avatar
John Koleszar committed
212

John Koleszar's avatar
John Koleszar committed
213
214
215
216
217
  // Further tests required to see if optimum is different
  // for key frames, golden frames and arf frames.
  // if (cpi->common.refresh_golden_frame ||
  //     cpi->common.refresh_alt_ref_frame)
  QIndex = (QIndex < 0) ? 0 : ((QIndex > MAXQ) ? MAXQ : QIndex);
218

John Koleszar's avatar
John Koleszar committed
219
  cpi->RDMULT = compute_rd_mult(QIndex);
John Koleszar's avatar
John Koleszar committed
220

John Koleszar's avatar
John Koleszar committed
221
222
223
224
225
226
227
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
    if (cpi->twopass.next_iiratio > 31)
      cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
    else
      cpi->RDMULT +=
        (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
  }
John Koleszar's avatar
John Koleszar committed
228

John Koleszar's avatar
John Koleszar committed
229
230
  if (cpi->RDMULT < 7)
    cpi->RDMULT = 7;
231

John Koleszar's avatar
John Koleszar committed
232
233
  cpi->mb.errorperbit = (cpi->RDMULT / 110);
  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
234

235
  vp9_set_speed_features(cpi);
John Koleszar's avatar
John Koleszar committed
236

237
  q = (int)pow(vp9_dc_quant(QIndex, 0) >> 2, 1.25);
John Koleszar's avatar
John Koleszar committed
238
239
  q = q << 2;
  cpi->RDMULT = cpi->RDMULT << 4;
Paul Wilkins's avatar
Paul Wilkins committed
240

John Koleszar's avatar
John Koleszar committed
241
242
  if (q < 8)
    q = 8;
243

John Koleszar's avatar
John Koleszar committed
244
245
246
  if (cpi->RDMULT > 1000) {
    cpi->RDDIV = 1;
    cpi->RDMULT /= 100;
John Koleszar's avatar
John Koleszar committed
247

John Koleszar's avatar
John Koleszar committed
248
249
250
251
252
253
    for (i = 0; i < MAX_MODES; i++) {
      if (cpi->sf.thresh_mult[i] < INT_MAX) {
        cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
      } else {
        cpi->rd_threshes[i] = INT_MAX;
      }
John Koleszar's avatar
John Koleszar committed
254

John Koleszar's avatar
John Koleszar committed
255
      cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
John Koleszar's avatar
John Koleszar committed
256
    }
John Koleszar's avatar
John Koleszar committed
257
258
  } else {
    cpi->RDDIV = 100;
John Koleszar's avatar
John Koleszar committed
259

John Koleszar's avatar
John Koleszar committed
260
261
262
263
264
265
    for (i = 0; i < MAX_MODES; i++) {
      if (cpi->sf.thresh_mult[i] < (INT_MAX / q)) {
        cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
      } else {
        cpi->rd_threshes[i] = INT_MAX;
      }
John Koleszar's avatar
John Koleszar committed
266

John Koleszar's avatar
John Koleszar committed
267
      cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
John Koleszar's avatar
John Koleszar committed
268
    }
John Koleszar's avatar
John Koleszar committed
269
  }
John Koleszar's avatar
John Koleszar committed
270

271
  fill_token_costs(cpi->mb.token_costs[TX_4X4],
272
                   cpi->common.fc.coef_probs_4x4, BLOCK_TYPES);
273
  fill_token_costs(cpi->mb.token_costs[TX_8X8],
274
                   cpi->common.fc.coef_probs_8x8, BLOCK_TYPES);
275
  fill_token_costs(cpi->mb.token_costs[TX_16X16],
276
                   cpi->common.fc.coef_probs_16x16, BLOCK_TYPES);
277
278
  fill_token_costs(cpi->mb.token_costs[TX_32X32],
                   cpi->common.fc.coef_probs_32x32, BLOCK_TYPES_32X32);
279

John Koleszar's avatar
John Koleszar committed
280
281
  /*rough estimate for costing*/
  cpi->common.kf_ymode_probs_index = cpi->common.base_qindex >> 4;
282
  vp9_init_mode_costs(cpi);
John Koleszar's avatar
John Koleszar committed
283

284
  if (cpi->common.frame_type != KEY_FRAME) {
285
    vp9_build_nmv_cost_table(
286
287
288
289
290
291
        cpi->mb.nmvjointcost,
        cpi->mb.e_mbd.allow_high_precision_mv ?
        cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
        &cpi->common.fc.nmvc,
        cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
  }
John Koleszar's avatar
John Koleszar committed
292
293
}

294
int vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, int block_size) {
295
  int i, error = 0;
John Koleszar's avatar
John Koleszar committed
296

297
  for (i = 0; i < block_size; i++) {
John Koleszar's avatar
John Koleszar committed
298
299
300
    int this_diff = coeff[i] - dqcoeff[i];
    error += this_diff * this_diff;
  }
John Koleszar's avatar
John Koleszar committed
301

John Koleszar's avatar
John Koleszar committed
302
  return error;
John Koleszar's avatar
John Koleszar committed
303
304
}

305
int vp9_mbblock_error_c(MACROBLOCK *mb) {
John Koleszar's avatar
John Koleszar committed
306
307
308
309
  BLOCK  *be;
  BLOCKD *bd;
  int i, j;
  int berror, error = 0;
John Koleszar's avatar
John Koleszar committed
310

John Koleszar's avatar
John Koleszar committed
311
312
313
314
  for (i = 0; i < 16; i++) {
    be = &mb->block[i];
    bd = &mb->e_mbd.block[i];
    berror = 0;
315
    for (j = 0; j < 16; j++) {
John Koleszar's avatar
John Koleszar committed
316
317
      int this_diff = be->coeff[j] - bd->dqcoeff[j];
      berror += this_diff * this_diff;
John Koleszar's avatar
John Koleszar committed
318
    }
John Koleszar's avatar
John Koleszar committed
319
320
321
    error += berror;
  }
  return error;
John Koleszar's avatar
John Koleszar committed
322
323
}

324
int vp9_mbuverror_c(MACROBLOCK *mb) {
John Koleszar's avatar
John Koleszar committed
325
326
  BLOCK  *be;
  BLOCKD *bd;
John Koleszar's avatar
John Koleszar committed
327

328
  int i, error = 0;
John Koleszar's avatar
John Koleszar committed
329

John Koleszar's avatar
John Koleszar committed
330
331
332
  for (i = 16; i < 24; i++) {
    be = &mb->block[i];
    bd = &mb->e_mbd.block[i];
John Koleszar's avatar
John Koleszar committed
333

334
    error += vp9_block_error_c(be->coeff, bd->dqcoeff, 16);
John Koleszar's avatar
John Koleszar committed
335
  }
John Koleszar's avatar
John Koleszar committed
336

John Koleszar's avatar
John Koleszar committed
337
  return error;
John Koleszar's avatar
John Koleszar committed
338
339
}

340
int vp9_uvsse(MACROBLOCK *x) {
341
342
343
  uint8_t *uptr, *vptr;
  uint8_t *upred_ptr = (*(x->block[16].base_src) + x->block[16].src);
  uint8_t *vpred_ptr = (*(x->block[20].base_src) + x->block[20].src);
John Koleszar's avatar
John Koleszar committed
344
345
346
347
  int uv_stride = x->block[16].src_stride;

  unsigned int sse1 = 0;
  unsigned int sse2 = 0;
348
349
  int mv_row = x->e_mbd.mode_info_context->mbmi.mv[0].as_mv.row;
  int mv_col = x->e_mbd.mode_info_context->mbmi.mv[0].as_mv.col;
John Koleszar's avatar
John Koleszar committed
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
  int offset;
  int pre_stride = x->e_mbd.block[16].pre_stride;

  if (mv_row < 0)
    mv_row -= 1;
  else
    mv_row += 1;

  if (mv_col < 0)
    mv_col -= 1;
  else
    mv_col += 1;

  mv_row /= 2;
  mv_col /= 2;

  offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
  uptr = x->e_mbd.pre.u_buffer + offset;
  vptr = x->e_mbd.pre.v_buffer + offset;

  if ((mv_row | mv_col) & 7) {
371
    vp9_sub_pixel_variance8x8(uptr, pre_stride, (mv_col & 7) << 1,
372
                              (mv_row & 7) << 1, upred_ptr, uv_stride, &sse2);
373
    vp9_sub_pixel_variance8x8(vptr, pre_stride, (mv_col & 7) << 1,
374
                              (mv_row & 7) << 1, vpred_ptr, uv_stride, &sse1);
John Koleszar's avatar
John Koleszar committed
375
376
    sse2 += sse1;
  } else {
377
378
    vp9_variance8x8(uptr, pre_stride, upred_ptr, uv_stride, &sse2);
    vp9_variance8x8(vptr, pre_stride, vpred_ptr, uv_stride, &sse1);
John Koleszar's avatar
John Koleszar committed
379
380
381
    sse2 += sse1;
  }
  return sse2;
John Koleszar's avatar
John Koleszar committed
382
383
384

}

385
386
387
388
389
static INLINE int cost_coeffs(MACROBLOCK *mb,
                              BLOCKD *b, PLANE_TYPE type,
                              ENTROPY_CONTEXT *a,
                              ENTROPY_CONTEXT *l,
                              TX_SIZE tx_size) {
390
  int pt;
Daniel Kang's avatar
Daniel Kang committed
391
  const int eob = b->eob;
392
  MACROBLOCKD *xd = &mb->e_mbd;
393
  const int ib = (int)(b - xd->block);
394
  int c = 0;
395
396
  int cost = 0, seg_eob;
  const int segment_id = xd->mode_info_context->mbmi.segment_id;
397
  const int *scan;
398
  int16_t *qcoeff_ptr = b->qcoeff;
399
  const int ref = xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME;
400
401
  const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
                          get_tx_type(xd, b) : DCT_DCT;
402
  unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
403
      mb->token_costs[tx_size][type][ref];
404
  ENTROPY_CONTEXT a_ec = *a, l_ec = *l;
405

406
  switch (tx_size) {
Daniel Kang's avatar
Daniel Kang committed
407
    case TX_4X4:
408
409
      scan = vp9_default_zig_zag1d_4x4;
      seg_eob = 16;
Deb Mukherjee's avatar
Deb Mukherjee committed
410
      if (type == PLANE_TYPE_Y_WITH_DC) {
411
412
413
414
        if (tx_type == ADST_DCT) {
          scan = vp9_row_scan_4x4;
        } else if (tx_type == DCT_ADST) {
          scan = vp9_col_scan_4x4;
415
        }
Daniel Kang's avatar
Daniel Kang committed
416
417
418
      }
      break;
    case TX_8X8:
419
420
      scan = vp9_default_zig_zag1d_8x8;
      seg_eob = 64;
Daniel Kang's avatar
Daniel Kang committed
421
422
      break;
    case TX_16X16:
423
      scan = vp9_default_zig_zag1d_16x16;
424
425
426
427
      seg_eob = 256;
      if (type == PLANE_TYPE_UV) {
        const int uv_idx = ib - 16;
        qcoeff_ptr = xd->sb_coeff_data.qcoeff + 1024 + 64 * uv_idx;
Deb Mukherjee's avatar
Deb Mukherjee committed
428
      }
Daniel Kang's avatar
Daniel Kang committed
429
      break;
430
431
    case TX_32X32:
      scan = vp9_default_zig_zag1d_32x32;
432
      seg_eob = 1024;
433
434
      qcoeff_ptr = xd->sb_coeff_data.qcoeff;
      break;
Daniel Kang's avatar
Daniel Kang committed
435
    default:
436
      abort();
Daniel Kang's avatar
Daniel Kang committed
437
438
      break;
  }
439

440
441
  VP9_COMBINEENTROPYCONTEXTS(pt, a_ec, l_ec);

Paul Wilkins's avatar
Paul Wilkins committed
442
443
  if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))
    seg_eob = 0;
444

445
  {
446
    int recent_energy = 0;
447
448
    for (; c < eob; c++) {
      int v = qcoeff_ptr[scan[c]];
449
      int t = vp9_dct_value_tokens_ptr[v].Token;
450
      cost += token_costs[get_coef_band(tx_size, c)][pt][t];
451
      cost += vp9_dct_value_cost_ptr[v];
452
      pt = vp9_get_coef_context(&recent_energy, t);
453
454
    }
    if (c < seg_eob)
455
      cost += mb->token_costs[tx_size][type][ref][get_coef_band(tx_size, c)]
456
          [pt][DCT_EOB_TOKEN];
457
458
  }

459
  // is eob first coefficient;
460
  pt = (c > 0);
461
462
463
464
  *a = *l = pt;
  return cost;
}

465
static int rdcost_mby_4x4(MACROBLOCK *mb, int backup) {
John Koleszar's avatar
John Koleszar committed
466
467
  int cost = 0;
  int b;
Paul Wilkins's avatar
Paul Wilkins committed
468
  MACROBLOCKD *xd = &mb->e_mbd;
John Koleszar's avatar
John Koleszar committed
469
470
471
  ENTROPY_CONTEXT_PLANES t_above, t_left;
  ENTROPY_CONTEXT *ta;
  ENTROPY_CONTEXT *tl;
472

473
474
475
  if (backup) {
    vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
John Koleszar's avatar
John Koleszar committed
476

477
478
479
480
481
482
    ta = (ENTROPY_CONTEXT *)&t_above;
    tl = (ENTROPY_CONTEXT *)&t_left;
  } else {
    ta = (ENTROPY_CONTEXT *)xd->above_context;
    tl = (ENTROPY_CONTEXT *)xd->left_context;
  }
John Koleszar's avatar
John Koleszar committed
483

John Koleszar's avatar
John Koleszar committed
484
  for (b = 0; b < 16; b++)
485
    cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_Y_WITH_DC,
Yaowu Xu's avatar
Yaowu Xu committed
486
487
                        ta + vp9_block2above[TX_4X4][b],
                        tl + vp9_block2left[TX_4X4][b],
Daniel Kang's avatar
Daniel Kang committed
488
                        TX_4X4);
John Koleszar's avatar
John Koleszar committed
489

John Koleszar's avatar
John Koleszar committed
490
  return cost;
John Koleszar's avatar
John Koleszar committed
491
492
}

493
494
495
static void macro_block_yrd_4x4(MACROBLOCK *mb,
                                int *Rate,
                                int *Distortion,
496
                                int *skippable, int backup) {
Paul Wilkins's avatar
Paul Wilkins committed
497
  MACROBLOCKD *const xd = &mb->e_mbd;
498

499
500
501
  xd->mode_info_context->mbmi.txfm_size = TX_4X4;
  vp9_transform_mby_4x4(mb);
  vp9_quantize_mby_4x4(mb);
502

503
504
505
  *Distortion = vp9_mbblock_error(mb) >> 2;
  *Rate = rdcost_mby_4x4(mb, backup);
  *skippable = vp9_mby_is_skippable_4x4(xd);
506
}
John Koleszar's avatar
John Koleszar committed
507

508
static int rdcost_mby_8x8(MACROBLOCK *mb, int backup) {
John Koleszar's avatar
John Koleszar committed
509
510
  int cost = 0;
  int b;
Paul Wilkins's avatar
Paul Wilkins committed
511
  MACROBLOCKD *xd = &mb->e_mbd;
John Koleszar's avatar
John Koleszar committed
512
513
514
515
  ENTROPY_CONTEXT_PLANES t_above, t_left;
  ENTROPY_CONTEXT *ta;
  ENTROPY_CONTEXT *tl;

Ronald S. Bultje's avatar
Ronald S. Bultje committed
516
517
518
  if (backup) {
    vpx_memcpy(&t_above,xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
John Koleszar's avatar
John Koleszar committed
519

Ronald S. Bultje's avatar
Ronald S. Bultje committed
520
521
522
523
524
525
    ta = (ENTROPY_CONTEXT *)&t_above;
    tl = (ENTROPY_CONTEXT *)&t_left;
  } else {
    ta = (ENTROPY_CONTEXT *)mb->e_mbd.above_context;
    tl = (ENTROPY_CONTEXT *)mb->e_mbd.left_context;
  }
John Koleszar's avatar
John Koleszar committed
526
527

  for (b = 0; b < 16; b += 4)
528
    cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_Y_WITH_DC,
Yaowu Xu's avatar
Yaowu Xu committed
529
530
                        ta + vp9_block2above[TX_8X8][b],
                        tl + vp9_block2left[TX_8X8][b],
Daniel Kang's avatar
Daniel Kang committed
531
                        TX_8X8);
John Koleszar's avatar
John Koleszar committed
532
533

  return cost;
534
535
}

John Koleszar's avatar
John Koleszar committed
536
537
538
static void macro_block_yrd_8x8(MACROBLOCK *mb,
                                int *Rate,
                                int *Distortion,
539
                                int *skippable, int backup) {
Paul Wilkins's avatar
Paul Wilkins committed
540
  MACROBLOCKD *const xd = &mb->e_mbd;
541
542

  xd->mode_info_context->mbmi.txfm_size = TX_8X8;
543
544
  vp9_transform_mby_8x8(mb);
  vp9_quantize_mby_8x8(mb);
John Koleszar's avatar
John Koleszar committed
545

546
547
548
  *Distortion = vp9_mbblock_error(mb) >> 2;
  *Rate = rdcost_mby_8x8(mb, backup);
  *skippable = vp9_mby_is_skippable_8x8(xd);
549
}
550

551
static int rdcost_mby_16x16(MACROBLOCK *mb, int backup) {
Daniel Kang's avatar
Daniel Kang committed
552
  int cost;
Paul Wilkins's avatar
Paul Wilkins committed
553
  MACROBLOCKD *xd = &mb->e_mbd;
Daniel Kang's avatar
Daniel Kang committed
554
555
556
  ENTROPY_CONTEXT_PLANES t_above, t_left;
  ENTROPY_CONTEXT *ta, *tl;

557
558
559
  if (backup) {
    vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
Daniel Kang's avatar
Daniel Kang committed
560

561
562
563
564
565
566
    ta = (ENTROPY_CONTEXT *)&t_above;
    tl = (ENTROPY_CONTEXT *)&t_left;
  } else {
    ta = (ENTROPY_CONTEXT *)xd->above_context;
    tl = (ENTROPY_CONTEXT *)xd->left_context;
  }
Daniel Kang's avatar
Daniel Kang committed
567

Paul Wilkins's avatar
Paul Wilkins committed
568
  cost = cost_coeffs(mb, xd->block, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_16X16);
Daniel Kang's avatar
Daniel Kang committed
569
570
  return cost;
}
571

Daniel Kang's avatar
Daniel Kang committed
572
static void macro_block_yrd_16x16(MACROBLOCK *mb, int *Rate, int *Distortion,
573
                                  int *skippable, int backup) {
Deb Mukherjee's avatar
Deb Mukherjee committed
574
  MACROBLOCKD *xd = &mb->e_mbd;
Daniel Kang's avatar
Daniel Kang committed
575

576
577
  xd->mode_info_context->mbmi.txfm_size = TX_16X16;
  vp9_transform_mby_16x16(mb);
578
  vp9_quantize_mby_16x16(mb);
579
580
581
  // TODO(jingning) is it possible to quickly determine whether to force
  //                trailing coefficients to be zero, instead of running trellis
  //                optimization in the rate-distortion optimization loop?
582
583
  if (mb->optimize &&
      xd->mode_info_context->mbmi.mode < I8X8_PRED)
584
    vp9_optimize_mby_16x16(mb);
585

586
  *Distortion = vp9_mbblock_error(mb) >> 2;
587
  *Rate = rdcost_mby_16x16(mb, backup);
588
  *skippable = vp9_mby_is_skippable_16x16(xd);
Daniel Kang's avatar
Daniel Kang committed
589
590
}

591
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
592
593
594
595
596
                                     int (*r)[2], int *rate,
                                     int *d, int *distortion,
                                     int *s, int *skip,
                                     int64_t txfm_cache[NB_TXFM_MODES],
                                     TX_SIZE max_txfm_size) {
597
598
599
600
601
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
  vp9_prob skip_prob = cm->mb_no_coeff_skip ?
                       vp9_get_pred_prob(cm, xd, PRED_MBSKIP) : 128;
602
603
604
605
606
607
608
609
610
611
612
613
  int64_t rd[TX_SIZE_MAX_SB][2];
  int n, m;

  for (n = TX_4X4; n <= max_txfm_size; n++) {
    r[n][1] = r[n][0];
    for (m = 0; m <= n - (n == max_txfm_size); m++) {
      if (m == n)
        r[n][1] += vp9_cost_zero(cm->prob_tx[m]);
      else
        r[n][1] += vp9_cost_one(cm->prob_tx[m]);
    }
  }
614
615
616
617
618
619
620
621

  if (cm->mb_no_coeff_skip) {
    int s0, s1;

    assert(skip_prob > 0);
    s0 = vp9_cost_bit(skip_prob, 0);
    s1 = vp9_cost_bit(skip_prob, 1);

622
    for (n = TX_4X4; n <= max_txfm_size; n++) {
623
      if (s[n]) {
624
        rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
625
      } else {
626
627
        rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
        rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
628
      }
629
630
    }
  } else {
631
632
633
    for (n = TX_4X4; n <= max_txfm_size; n++) {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0], d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1], d[n]);
634
635
636
    }
  }

637
638
639
640
641
642
  if (max_txfm_size == TX_32X32 &&
      (cm->txfm_mode == ALLOW_32X32 ||
       (cm->txfm_mode == TX_MODE_SELECT &&
        rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
        rd[TX_32X32][1] < rd[TX_4X4][1]))) {
    mbmi->txfm_size = TX_32X32;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
643
644
645
646
647
  } else if ( cm->txfm_mode == ALLOW_16X16 ||
             (max_txfm_size == TX_16X16 && cm->txfm_mode == ALLOW_32X32) ||
             (cm->txfm_mode == TX_MODE_SELECT &&
              rd[TX_16X16][1] < rd[TX_8X8][1] &&
              rd[TX_16X16][1] < rd[TX_4X4][1])) {
648
    mbmi->txfm_size = TX_16X16;
649
  } else if (cm->txfm_mode == ALLOW_8X8 ||
650
           (cm->txfm_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
651
652
    mbmi->txfm_size = TX_8X8;
  } else {
653
    assert(cm->txfm_mode == ONLY_4X4 || cm->txfm_mode == TX_MODE_SELECT);
654
655
656
    mbmi->txfm_size = TX_4X4;
  }

657
  *distortion = d[mbmi->txfm_size];
658
  *rate       = r[mbmi->txfm_size][cm->txfm_mode == TX_MODE_SELECT];
659
660
  *skip       = s[mbmi->txfm_size];

661
662
663
664
665
666
667
668
  txfm_cache[ONLY_4X4] = rd[TX_4X4][0];
  txfm_cache[ALLOW_8X8] = rd[TX_8X8][0];
  txfm_cache[ALLOW_16X16] = rd[TX_16X16][0];
  txfm_cache[ALLOW_32X32] = rd[max_txfm_size][0];
  if (max_txfm_size == TX_32X32 &&
      rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
      rd[TX_32X32][1] < rd[TX_4X4][1])
    txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
Ronald S. Bultje's avatar
Ronald S. Bultje committed
669
  else if (rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
670
    txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
671
  else
672
673
    txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
                                 rd[TX_4X4][1] : rd[TX_8X8][1];
674
675
676
677
678
679
}

static void macro_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
                            int *distortion, int *skippable,
                            int64_t txfm_cache[NB_TXFM_MODES]) {
  MACROBLOCKD *const xd = &x->e_mbd;
680
  int r[TX_SIZE_MAX_MB][2], d[TX_SIZE_MAX_MB], s[TX_SIZE_MAX_MB];
681
682
683

  vp9_subtract_mby(x->src_diff, *(x->block[0].base_src), xd->predictor,
                   x->block[0].src_stride);
684

685
686
687
  macro_block_yrd_16x16(x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16], 1);
  macro_block_yrd_8x8(x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], 1);
  macro_block_yrd_4x4(x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], 1);
688
689

  choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skippable,
690
                           txfm_cache, TX_16X16);
691
692
}

693
static void copy_predictor(uint8_t *dst, const uint8_t *predictor) {
John Koleszar's avatar
John Koleszar committed
694
695
696
697
698
699
  const unsigned int *p = (const unsigned int *)predictor;
  unsigned int *d = (unsigned int *)dst;
  d[0] = p[0];
  d[4] = p[4];
  d[8] = p[8];
  d[12] = p[12];
700
}
Yaowu Xu's avatar
Yaowu Xu committed
701

Ronald S. Bultje's avatar
Ronald S. Bultje committed
702
static int rdcost_sby_32x32(MACROBLOCK *x, int backup) {
703
704
  MACROBLOCKD * const xd = &x->e_mbd;
  ENTROPY_CONTEXT_PLANES t_above, t_left;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
705
  ENTROPY_CONTEXT *ta, *tl;
706

Ronald S. Bultje's avatar
Ronald S. Bultje committed
707
708
709
710
711
712
713
714
715
716
  if (backup) {
    ta = (ENTROPY_CONTEXT *) &t_above,
    tl = (ENTROPY_CONTEXT *) &t_left;

    vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    vpx_memcpy(&t_left,  xd->left_context,  sizeof(ENTROPY_CONTEXT_PLANES));
  } else {
    ta = (ENTROPY_CONTEXT *) xd->above_context;
    tl = (ENTROPY_CONTEXT *) xd->left_context;
  }
717
718
719
720

  return cost_coeffs(x, xd->block, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_32X32);
}

721
722
static int vp9_sb_block_error_c(int16_t *coeff, int16_t *dqcoeff,
                                int block_size) {
723
724
725
726
727
728
729
730
731
732
733
734
735
  int i;
  int64_t error = 0;

  for (i = 0; i < block_size; i++) {
    unsigned int this_diff = coeff[i] - dqcoeff[i];
    error += this_diff * this_diff;
  }

  return error > INT_MAX ? INT_MAX : error;
}

#define DEBUG_ERROR 0
static void super_block_yrd_32x32(MACROBLOCK *x,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
736
737
                                  int *rate, int *distortion, int *skippable,
                                  int backup) {
738
739
740
  SUPERBLOCK  * const x_sb = &x->sb_coeff_data;
  MACROBLOCKD * const xd = &x->e_mbd;
  SUPERBLOCKD * const xd_sb = &xd->sb_coeff_data;
741
#if DEBUG_ERROR
742
  int16_t out[1024];
743
744
745
746
#endif

  vp9_transform_sby_32x32(x);
  vp9_quantize_sby_32x32(x);
747
#if DEBUG_ERROR
748
749
750
751
  vp9_short_idct32x32(xd_sb->dqcoeff, out, 64);
#endif

  *distortion = vp9_sb_block_error_c(x_sb->coeff, xd_sb->dqcoeff, 1024);
752

753
754
755
756
#if DEBUG_ERROR
  printf("IDCT/FDCT error 32x32: %d (d: %d)\n",
         vp9_block_error_c(x_sb->src_diff, out, 1024), *distortion);
#endif
Ronald S. Bultje's avatar
Ronald S. Bultje committed
757
  *rate       = rdcost_sby_32x32(x, backup);
758
759
760
  *skippable  = vp9_sby_is_skippable_32x32(&x->e_mbd);
}

761
762
static void super_block_yrd(VP9_COMP *cpi,
                            MACROBLOCK *x, int *rate, int *distortion,
763
                            int *skip,
764
                            int64_t txfm_cache[NB_TXFM_MODES]) {
Ronald S. Bultje's avatar
Ronald S. Bultje committed
765
  MACROBLOCKD *const xd = &x->e_mbd;
766
  int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB], n;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
767
768
  const uint8_t *src = x->src.y_buffer, *dst = xd->dst.y_buffer;
  int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride;
769
770
771
772
  ENTROPY_CONTEXT_PLANES t_above[TX_SIZE_MAX_MB][2],
                        *orig_above = xd->above_context;
  ENTROPY_CONTEXT_PLANES t_left[TX_SIZE_MAX_MB][2],
                        *orig_left = xd->left_context;
773

774
  for (n = TX_4X4; n < TX_SIZE_MAX_MB; n++) {
775
776
    vpx_memcpy(t_above[n], xd->above_context, sizeof(t_above[n]));
    vpx_memcpy(t_left[n], xd->left_context, sizeof(t_left[n]));
777
    r[n][0] = 0;
778
779
780
    d[n] = 0;
    s[n] = 1;
  }
Ronald S. Bultje's avatar
Ronald S. Bultje committed
781

782
783
  vp9_subtract_sby_s_c(x->sb_coeff_data.src_diff, src, src_y_stride,
                       dst, dst_y_stride);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
784
  super_block_yrd_32x32(x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32], 1);
785
786
787
788

#if DEBUG_ERROR
  int err[3] = { 0, 0, 0 };
#endif
Ronald S. Bultje's avatar
Ronald S. Bultje committed
789
790
  for (n = 0; n < 4; n++) {
    int x_idx = n & 1, y_idx = n >> 1;
791
    int r_tmp, d_tmp, s_tmp;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
792

793
    vp9_subtract_mby_s_c(x->src_diff,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
794
795
796
797
                         src + x_idx * 16 + y_idx * 16 * src_y_stride,
                         src_y_stride,
                         dst + x_idx * 16 + y_idx * 16 * dst_y_stride,
                         dst_y_stride);
798
799
800

    xd->above_context = &t_above[TX_16X16][x_idx];
    xd->left_context = &t_left[TX_16X16][y_idx];
801
    macro_block_yrd_16x16(x, &r_tmp, &d_tmp, &s_tmp, 0);
802
    d[TX_16X16] += d_tmp;
803
    r[TX_16X16][0] += r_tmp;
804
    s[TX_16X16] = s[TX_16X16] && s_tmp;
805
806
807
808
#if DEBUG_ERROR
    vp9_inverse_transform_mby_16x16(xd);
    err[2] += vp9_block_error_c(xd->diff, x->src_diff, 256);
#endif
809
810
811

    xd->above_context = &t_above[TX_4X4][x_idx];
    xd->left_context = &t_left[TX_4X4][y_idx];
812
    macro_block_yrd_4x4(x, &r_tmp, &d_tmp, &s_tmp, 0);
813
    d[TX_4X4] += d_tmp;
814
    r[TX_4X4][0] += r_tmp;
815
    s[TX_4X4] = s[TX_4X4] && s_tmp;
816
817
818
819
#if DEBUG_ERROR
    vp9_inverse_transform_mby_4x4(xd);
    err[0] += vp9_block_error_c(xd->diff, x->src_diff, 256);
#endif
820
821
822

    xd->above_context = &t_above[TX_8X8][x_idx];
    xd->left_context = &t_left[TX_8X8][y_idx];
823
    macro_block_yrd_8x8(x, &r_tmp, &d_tmp, &s_tmp, 0);
824
    d[TX_8X8] += d_tmp;
825
    r[TX_8X8][0] += r_tmp;
826
    s[TX_8X8] = s[TX_8X8] && s_tmp;
827
828
829
830
#if DEBUG_ERROR
    vp9_inverse_transform_mby_8x8(xd);
    err[1] += vp9_block_error_c(xd->diff, x->src_diff, 256);
#endif
Ronald S. Bultje's avatar
Ronald S. Bultje committed
831
  }
832
833
834
835
836
837
838
#if DEBUG_ERROR
  printf("IDCT/FDCT error 16x16: %d (d: %d)\n", err[2], d[2]);
  printf("IDCT/FDCT error 8x8: %d (d: %d)\n", err[1], d[1]);
  printf("IDCT/FDCT error 4x4: %d (d: %d)\n", err[0], d[0]);
#endif
  choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache,
                           TX_SIZE_MAX_SB - 1);
839
840
841

  xd->above_context = orig_above;
  xd->left_context = orig_left;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
842
}
Ronald S. Bultje's avatar
Ronald S. Bultje committed
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938

static void super_block_64_yrd(VP9_COMP *cpi,
                               MACROBLOCK *x, int *rate, int *distortion,
                               int *skip,
                               int64_t txfm_cache[NB_TXFM_MODES]) {
  MACROBLOCKD *const xd = &x->e_mbd;
  int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB], n;
  const uint8_t *src = x->src.y_buffer, *dst = xd->dst.y_buffer;
  int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride;
  ENTROPY_CONTEXT_PLANES t_above[TX_SIZE_MAX_SB][4],
                        *orig_above = xd->above_context;
  ENTROPY_CONTEXT_PLANES t_left[TX_SIZE_MAX_SB][4],
                        *orig_left = xd->left_context;

  for (n = TX_4X4; n < TX_SIZE_MAX_SB; n++) {
    vpx_memcpy(t_above[n], xd->above_context, sizeof(t_above[n]));
    vpx_memcpy(t_left[n], xd->left_context, sizeof(t_left[n]));
    r[n][0] = 0;
    d[n] = 0;
    s[n] = 1;
  }

  for (n = 0; n < 4; n++) {
    int x_idx = n & 1, y_idx = n >> 1;
    int r_tmp, d_tmp, s_tmp;

    xd->above_context = &t_above[TX_32X32][x_idx << 1];
    xd->left_context = &t_left[TX_32X32][y_idx << 1];
    vp9_subtract_sby_s_c(x->sb_coeff_data.src_diff,
                         src + 32 * x_idx + 32 * y_idx * src_y_stride,
                         src_y_stride,
                         dst + 32 * x_idx + 32 * y_idx * dst_y_stride,
                         dst_y_stride);
    super_block_yrd_32x32(x, &r_tmp, &d_tmp, &s_tmp, 0);
    r[TX_32X32][0] += r_tmp;
    d[TX_32X32] += d_tmp;
    s[TX_32X32] = s[TX_32X32] && s_tmp;
  }

#if DEBUG_ERROR
  int err[3] = { 0, 0, 0 };
#endif
  for (n = 0; n < 16; n++) {
    int x_idx = n & 3, y_idx = n >> 2;
    int r_tmp, d_tmp, s_tmp;

    vp9_subtract_mby_s_c(x->src_diff,
                         src + x_idx * 16 + y_idx * 16 * src_y_stride,
                         src_y_stride,
                         dst + x_idx * 16 + y_idx * 16 * dst_y_stride,
                         dst_y_stride);

    xd->above_context = &t_above[TX_16X16][x_idx];
    xd->left_context = &t_left[TX_16X16][y_idx];
    macro_block_yrd_16x16(x, &r_tmp, &d_tmp, &s_tmp, 0);
    d[TX_16X16] += d_tmp;
    r[TX_16X16][0] += r_tmp;
    s[TX_16X16] = s[TX_16X16] && s_tmp;
#if DEBUG_ERROR
    vp9_inverse_transform_mby_16x16(xd);
    err[2] += vp9_block_error_c(xd->diff, x->src_diff, 256);
#endif

    xd->above_context = &t_above[TX_4X4][x_idx];
    xd->left_context = &t_left[TX_4X4][y_idx];
    macro_block_yrd_4x4(x, &r_tmp, &d_tmp, &s_tmp, 0);
    d[TX_4X4] += d_tmp;
    r[TX_4X4][0] += r_tmp;
    s[TX_4X4] = s[TX_4X4] && s_tmp;
#if DEBUG_ERROR
    vp9_inverse_transform_mby_4x4(xd);
    err[0] += vp9_block_error_c(xd->diff, x->src_diff, 256);
#endif

    xd->above_context = &t_above[TX_8X8][x_idx];
    xd->left_context = &t_left[TX_8X8][y_idx];
    macro_block_yrd_8x8(x, &r_tmp, &d_tmp, &s_tmp, 0);
    d[TX_8X8] += d_tmp;
    r[TX_8X8][0] += r_tmp;
    s[TX_8X8] = s[TX_8X8] && s_tmp;
#if DEBUG_ERROR
    vp9_inverse_transform_mby_8x8(xd);
    err[1] += vp9_block_error_c(xd->diff, x->src_diff, 256);
#endif
  }
#if DEBUG_ERROR
  printf("IDCT/FDCT error 16x16: %d (d: %d)\n", err[2], d[2]);
  printf("IDCT/FDCT error 8x8: %d (d: %d)\n", err[1], d[1]);
  printf("IDCT/FDCT error 4x4: %d (d: %d)\n", err[0], d[0]);
#endif
  choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache,
                           TX_SIZE_MAX_SB - 1);

  xd->above_context = orig_above;
  xd->left_context = orig_left;
}
Ronald S. Bultje's avatar
Ronald S. Bultje committed
939

940
static void copy_predictor_8x8(uint8_t *dst, const uint8_t *predictor) {
John Koleszar's avatar
John Koleszar committed
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
  const unsigned int *p = (const unsigned int *)predictor;
  unsigned int *d = (unsigned int *)dst;
  d[0] = p[0];
  d[1] = p[1];
  d[4] = p[4];
  d[5] = p[5];
  d[8] = p[8];
  d[9] = p[9];
  d[12] = p[12];
  d[13] = p[13];
  d[16] = p[16];
  d[17] = p[17];
  d[20] = p[20];
  d[21] = p[21];
  d[24] = p[24];
  d[25] = p[25];
  d[28] = p[28];
  d[29] = p[29];
Yaowu Xu's avatar
Yaowu Xu committed
959
960
}

961
static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be,
962
963
964
965
966
                                     BLOCKD *b, B_PREDICTION_MODE *best_mode,
                                     int *bmode_costs,
                                     ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                                     int *bestrate, int *bestratey,
                                     int *bestdistortion) {
John Koleszar's avatar
John Koleszar committed
967
  B_PREDICTION_MODE mode;
Deb Mukherjee's avatar
Deb Mukherjee committed
968
  MACROBLOCKD *xd = &x->e_mbd;
969
  int64_t best_rd = INT64_MAX;
John Koleszar's avatar
John Koleszar committed
970
971
972
973
974
  int rate = 0;
  int distortion;

  ENTROPY_CONTEXT ta = *a, tempa = *a;
  ENTROPY_CONTEXT tl = *l, templ = *l;
Deb Mukherjee's avatar
Deb Mukherjee committed
975
976
  TX_TYPE tx_type = DCT_DCT;
  TX_TYPE best_tx_type = DCT_DCT;
John Koleszar's avatar
John Koleszar committed
977