vp9_rdopt.c 171 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
11
12
13
14
15
 */


#include <stdio.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
16
17
#include "vp9/common/vp9_pragmas.h"

18
19
20
21
22
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_modecosts.h"
#include "vp9/encoder/vp9_encodeintra.h"
23
24
25
26
27
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_quant_common.h"
28
29
30
31
32
33
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_variance.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_ratectrl.h"
John Koleszar's avatar
John Koleszar committed
34
#include "vpx_mem/vpx_mem.h"
35
36
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
John Koleszar's avatar
John Koleszar committed
37

38
39
40
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_entropy.h"
41
#include "vp9_rtcd.h"
42
#include "vp9/common/vp9_mvref_common.h"
Ronald S. Bultje's avatar
Ronald S. Bultje committed
43
#include "vp9/common/vp9_common.h"
Paul Wilkins's avatar
Paul Wilkins committed
44

John Koleszar's avatar
John Koleszar committed
45
46
#define MAXF(a,b)            (((a) > (b)) ? (a) : (b))

47
48
#define INVALID_MV 0x80008000

49
50
51
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1

John Koleszar's avatar
John Koleszar committed
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
static const int auto_speed_thresh[17] = {
  1000,
  200,
  150,
  130,
  150,
  125,
  120,
  115,
  115,
  115,
  115,
  115,
  115,
  115,
  115,
  115,
  105
John Koleszar's avatar
John Koleszar committed
70
71
};

72
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
73
74
  {ZEROMV,    LAST_FRAME,   NONE},
  {DC_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
75

76
77
  {NEARESTMV, LAST_FRAME,   NONE},
  {NEARMV,    LAST_FRAME,   NONE},
John Koleszar's avatar
John Koleszar committed
78

79
80
  {ZEROMV,    GOLDEN_FRAME, NONE},
  {NEARESTMV, GOLDEN_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
81

82
83
  {ZEROMV,    ALTREF_FRAME, NONE},
  {NEARESTMV, ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
84

85
86
  {NEARMV,    GOLDEN_FRAME, NONE},
  {NEARMV,    ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
87

88
89
90
91
92
93
94
95
  {V_PRED,    INTRA_FRAME,  NONE},
  {H_PRED,    INTRA_FRAME,  NONE},
  {D45_PRED,  INTRA_FRAME,  NONE},
  {D135_PRED, INTRA_FRAME,  NONE},
  {D117_PRED, INTRA_FRAME,  NONE},
  {D153_PRED, INTRA_FRAME,  NONE},
  {D27_PRED,  INTRA_FRAME,  NONE},
  {D63_PRED,  INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
96

97
  {TM_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
98

99
100
101
  {NEWMV,     LAST_FRAME,   NONE},
  {NEWMV,     GOLDEN_FRAME, NONE},
  {NEWMV,     ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
102

103
104
105
  {SPLITMV,   LAST_FRAME,   NONE},
  {SPLITMV,   GOLDEN_FRAME, NONE},
  {SPLITMV,   ALTREF_FRAME, NONE},
106

107
108
  {B_PRED,    INTRA_FRAME,  NONE},
  {I8X8_PRED, INTRA_FRAME,  NONE},
109

John Koleszar's avatar
John Koleszar committed
110
111
112
113
  /* compound prediction modes */
  {ZEROMV,    LAST_FRAME,   GOLDEN_FRAME},
  {NEARESTMV, LAST_FRAME,   GOLDEN_FRAME},
  {NEARMV,    LAST_FRAME,   GOLDEN_FRAME},
114

John Koleszar's avatar
John Koleszar committed
115
116
117
  {ZEROMV,    ALTREF_FRAME, LAST_FRAME},
  {NEARESTMV, ALTREF_FRAME, LAST_FRAME},
  {NEARMV,    ALTREF_FRAME, LAST_FRAME},
118

John Koleszar's avatar
John Koleszar committed
119
120
121
  {ZEROMV,    GOLDEN_FRAME, ALTREF_FRAME},
  {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
  {NEARMV,    GOLDEN_FRAME, ALTREF_FRAME},
122

John Koleszar's avatar
John Koleszar committed
123
124
125
  {NEWMV,     LAST_FRAME,   GOLDEN_FRAME},
  {NEWMV,     ALTREF_FRAME, LAST_FRAME  },
  {NEWMV,     GOLDEN_FRAME, ALTREF_FRAME},
126

John Koleszar's avatar
John Koleszar committed
127
128
  {SPLITMV,   LAST_FRAME,   GOLDEN_FRAME},
  {SPLITMV,   ALTREF_FRAME, LAST_FRAME  },
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
  {SPLITMV,   GOLDEN_FRAME, ALTREF_FRAME},

#if CONFIG_COMP_INTERINTRA_PRED
  /* compound inter-intra prediction */
  {ZEROMV,    LAST_FRAME,   INTRA_FRAME},
  {NEARESTMV, LAST_FRAME,   INTRA_FRAME},
  {NEARMV,    LAST_FRAME,   INTRA_FRAME},
  {NEWMV,     LAST_FRAME,   INTRA_FRAME},

  {ZEROMV,    GOLDEN_FRAME,   INTRA_FRAME},
  {NEARESTMV, GOLDEN_FRAME,   INTRA_FRAME},
  {NEARMV,    GOLDEN_FRAME,   INTRA_FRAME},
  {NEWMV,     GOLDEN_FRAME,   INTRA_FRAME},

  {ZEROMV,    ALTREF_FRAME,   INTRA_FRAME},
  {NEARESTMV, ALTREF_FRAME,   INTRA_FRAME},
  {NEARMV,    ALTREF_FRAME,   INTRA_FRAME},
  {NEWMV,     ALTREF_FRAME,   INTRA_FRAME},
#endif
John Koleszar's avatar
John Koleszar committed
148
149
};

150
151
152
static void fill_token_costs(vp9_coeff_count *c,
                             vp9_coeff_probs *p,
                             int block_type_counts) {
John Koleszar's avatar
John Koleszar committed
153
154
155
156
157
  int i, j, k;

  for (i = 0; i < block_type_counts; i++)
    for (j = 0; j < COEF_BANDS; j++)
      for (k = 0; k < PREV_COEF_CONTEXTS; k++) {
158
        if (k == 0 && j > 0)
159
160
          vp9_cost_tokens_skip((int *)(c[i][j][k]),
                               p[i][j][k],
161
                               vp9_coef_tree);
John Koleszar's avatar
John Koleszar committed
162
        else
163
164
          vp9_cost_tokens((int *)(c[i][j][k]),
                          p[i][j][k],
165
                          vp9_coef_tree);
John Koleszar's avatar
John Koleszar committed
166
      }
John Koleszar's avatar
John Koleszar committed
167
168
}

169

170
171
172
173
static int rd_iifactor[32] =  { 4, 4, 3, 2, 1, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0, };
John Koleszar's avatar
John Koleszar committed
174

175
// 3* dc_qlookup[Q]*dc_qlookup[Q];
176

177
/* values are now correlated to quantizer */
Paul Wilkins's avatar
Paul Wilkins committed
178
179
180
static int sad_per_bit16lut[QINDEX_RANGE];
static int sad_per_bit4lut[QINDEX_RANGE];

181
void vp9_init_me_luts() {
John Koleszar's avatar
John Koleszar committed
182
183
184
185
186
187
188
  int i;

  // Initialize the sad lut tables using a formulaic calculation for now
  // This is to make it easier to resolve the impact of experimental changes
  // to the quantizer tables.
  for (i = 0; i < QINDEX_RANGE; i++) {
    sad_per_bit16lut[i] =
189
190
      (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
    sad_per_bit4lut[i] = (int)((0.063 * vp9_convert_qindex_to_q(i)) + 2.742);
John Koleszar's avatar
John Koleszar committed
191
  }
Paul Wilkins's avatar
Paul Wilkins committed
192
}
John Koleszar's avatar
John Koleszar committed
193

194
static int compute_rd_mult(int qindex) {
John Koleszar's avatar
John Koleszar committed
195
  int q;
196

197
  q = vp9_dc_quant(qindex, 0);
John Koleszar's avatar
John Koleszar committed
198
  return (11 * q * q) >> 6;
199
200
}

201
void vp9_initialize_me_consts(VP9_COMP *cpi, int QIndex) {
John Koleszar's avatar
John Koleszar committed
202
203
  cpi->mb.sadperbit16 =  sad_per_bit16lut[QIndex];
  cpi->mb.sadperbit4  =  sad_per_bit4lut[QIndex];
John Koleszar's avatar
John Koleszar committed
204
205
}

206

207
void vp9_initialize_rd_consts(VP9_COMP *cpi, int QIndex) {
208
  int q, i;
John Koleszar's avatar
John Koleszar committed
209

210
  vp9_clear_system_state();  // __asm emms;
John Koleszar's avatar
John Koleszar committed
211

John Koleszar's avatar
John Koleszar committed
212
213
214
215
216
  // Further tests required to see if optimum is different
  // for key frames, golden frames and arf frames.
  // if (cpi->common.refresh_golden_frame ||
  //     cpi->common.refresh_alt_ref_frame)
  QIndex = (QIndex < 0) ? 0 : ((QIndex > MAXQ) ? MAXQ : QIndex);
217

John Koleszar's avatar
John Koleszar committed
218
  cpi->RDMULT = compute_rd_mult(QIndex);
John Koleszar's avatar
John Koleszar committed
219

John Koleszar's avatar
John Koleszar committed
220
221
222
  // Extend rate multiplier along side quantizer zbin increases
  if (cpi->zbin_over_quant  > 0) {
    double oq_factor;
223

John Koleszar's avatar
John Koleszar committed
224
225
226
227
228
    // Experimental code using the same basic equation as used for Q above
    // The units of cpi->zbin_over_quant are 1/128 of Q bin size
    oq_factor = 1.0 + ((double)0.0015625 * cpi->zbin_over_quant);
    cpi->RDMULT = (int)((double)cpi->RDMULT * oq_factor * oq_factor);
  }
John Koleszar's avatar
John Koleszar committed
229

John Koleszar's avatar
John Koleszar committed
230
231
232
233
234
235
236
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
    if (cpi->twopass.next_iiratio > 31)
      cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
    else
      cpi->RDMULT +=
        (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
  }
John Koleszar's avatar
John Koleszar committed
237

John Koleszar's avatar
John Koleszar committed
238
239
  if (cpi->RDMULT < 7)
    cpi->RDMULT = 7;
240

John Koleszar's avatar
John Koleszar committed
241
242
  cpi->mb.errorperbit = (cpi->RDMULT / 110);
  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
243

244
  vp9_set_speed_features(cpi);
John Koleszar's avatar
John Koleszar committed
245

246
  q = (int)pow(vp9_dc_quant(QIndex, 0) >> 2, 1.25);
John Koleszar's avatar
John Koleszar committed
247
248
  q = q << 2;
  cpi->RDMULT = cpi->RDMULT << 4;
Paul Wilkins's avatar
Paul Wilkins committed
249

John Koleszar's avatar
John Koleszar committed
250
251
  if (q < 8)
    q = 8;
252

John Koleszar's avatar
John Koleszar committed
253
254
255
  if (cpi->RDMULT > 1000) {
    cpi->RDDIV = 1;
    cpi->RDMULT /= 100;
John Koleszar's avatar
John Koleszar committed
256

John Koleszar's avatar
John Koleszar committed
257
258
259
260
261
262
    for (i = 0; i < MAX_MODES; i++) {
      if (cpi->sf.thresh_mult[i] < INT_MAX) {
        cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
      } else {
        cpi->rd_threshes[i] = INT_MAX;
      }
John Koleszar's avatar
John Koleszar committed
263

John Koleszar's avatar
John Koleszar committed
264
      cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
John Koleszar's avatar
John Koleszar committed
265
    }
John Koleszar's avatar
John Koleszar committed
266
267
  } else {
    cpi->RDDIV = 100;
John Koleszar's avatar
John Koleszar committed
268

John Koleszar's avatar
John Koleszar committed
269
270
271
272
273
274
    for (i = 0; i < MAX_MODES; i++) {
      if (cpi->sf.thresh_mult[i] < (INT_MAX / q)) {
        cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
      } else {
        cpi->rd_threshes[i] = INT_MAX;
      }
John Koleszar's avatar
John Koleszar committed
275

John Koleszar's avatar
John Koleszar committed
276
      cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
John Koleszar's avatar
John Koleszar committed
277
    }
John Koleszar's avatar
John Koleszar committed
278
  }
John Koleszar's avatar
John Koleszar committed
279

280
281
282
  fill_token_costs(cpi->mb.token_costs[TX_4X4],
                   cpi->common.fc.coef_probs_4x4, BLOCK_TYPES_4X4);
  fill_token_costs(cpi->mb.hybrid_token_costs[TX_4X4],
283
284
                   cpi->common.fc.hybrid_coef_probs_4x4,
                   BLOCK_TYPES_4X4_HYBRID);
285
286
287
288

  fill_token_costs(cpi->mb.token_costs[TX_8X8],
                   cpi->common.fc.coef_probs_8x8, BLOCK_TYPES_8X8);
  fill_token_costs(cpi->mb.hybrid_token_costs[TX_8X8],
289
290
                   cpi->common.fc.hybrid_coef_probs_8x8,
                   BLOCK_TYPES_8X8_HYBRID);
291
292
293
294

  fill_token_costs(cpi->mb.token_costs[TX_16X16],
                   cpi->common.fc.coef_probs_16x16, BLOCK_TYPES_16X16);
  fill_token_costs(cpi->mb.hybrid_token_costs[TX_16X16],
295
296
                   cpi->common.fc.hybrid_coef_probs_16x16,
                   BLOCK_TYPES_16X16_HYBRID);
Daniel Kang's avatar
Daniel Kang committed
297

298
299
  fill_token_costs(cpi->mb.token_costs[TX_32X32],
                   cpi->common.fc.coef_probs_32x32, BLOCK_TYPES_32X32);
300

John Koleszar's avatar
John Koleszar committed
301
302
  /*rough estimate for costing*/
  cpi->common.kf_ymode_probs_index = cpi->common.base_qindex >> 4;
303
  vp9_init_mode_costs(cpi);
John Koleszar's avatar
John Koleszar committed
304

305
  if (cpi->common.frame_type != KEY_FRAME) {
306
    vp9_build_nmv_cost_table(
307
308
309
310
311
312
        cpi->mb.nmvjointcost,
        cpi->mb.e_mbd.allow_high_precision_mv ?
        cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
        &cpi->common.fc.nmvc,
        cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
  }
John Koleszar's avatar
John Koleszar committed
313
314
}

315
int vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, int block_size) {
316
  int i, error = 0;
John Koleszar's avatar
John Koleszar committed
317

318
  for (i = 0; i < block_size; i++) {
John Koleszar's avatar
John Koleszar committed
319
320
321
    int this_diff = coeff[i] - dqcoeff[i];
    error += this_diff * this_diff;
  }
John Koleszar's avatar
John Koleszar committed
322

John Koleszar's avatar
John Koleszar committed
323
  return error;
John Koleszar's avatar
John Koleszar committed
324
325
}

326
int vp9_mbblock_error_c(MACROBLOCK *mb) {
John Koleszar's avatar
John Koleszar committed
327
328
329
330
  BLOCK  *be;
  BLOCKD *bd;
  int i, j;
  int berror, error = 0;
John Koleszar's avatar
John Koleszar committed
331

John Koleszar's avatar
John Koleszar committed
332
333
334
335
  for (i = 0; i < 16; i++) {
    be = &mb->block[i];
    bd = &mb->e_mbd.block[i];
    berror = 0;
336
    for (j = 0; j < 16; j++) {
John Koleszar's avatar
John Koleszar committed
337
338
      int this_diff = be->coeff[j] - bd->dqcoeff[j];
      berror += this_diff * this_diff;
John Koleszar's avatar
John Koleszar committed
339
    }
John Koleszar's avatar
John Koleszar committed
340
341
342
    error += berror;
  }
  return error;
John Koleszar's avatar
John Koleszar committed
343
344
}

345
int vp9_mbuverror_c(MACROBLOCK *mb) {
John Koleszar's avatar
John Koleszar committed
346
347
  BLOCK  *be;
  BLOCKD *bd;
John Koleszar's avatar
John Koleszar committed
348

349
  int i, error = 0;
John Koleszar's avatar
John Koleszar committed
350

John Koleszar's avatar
John Koleszar committed
351
352
353
  for (i = 16; i < 24; i++) {
    be = &mb->block[i];
    bd = &mb->e_mbd.block[i];
John Koleszar's avatar
John Koleszar committed
354

355
    error += vp9_block_error_c(be->coeff, bd->dqcoeff, 16);
John Koleszar's avatar
John Koleszar committed
356
  }
John Koleszar's avatar
John Koleszar committed
357

John Koleszar's avatar
John Koleszar committed
358
  return error;
John Koleszar's avatar
John Koleszar committed
359
360
}

361
int vp9_uvsse(MACROBLOCK *x) {
362
363
364
  uint8_t *uptr, *vptr;
  uint8_t *upred_ptr = (*(x->block[16].base_src) + x->block[16].src);
  uint8_t *vpred_ptr = (*(x->block[20].base_src) + x->block[20].src);
John Koleszar's avatar
John Koleszar committed
365
366
367
368
  int uv_stride = x->block[16].src_stride;

  unsigned int sse1 = 0;
  unsigned int sse2 = 0;
369
370
  int mv_row = x->e_mbd.mode_info_context->mbmi.mv[0].as_mv.row;
  int mv_col = x->e_mbd.mode_info_context->mbmi.mv[0].as_mv.col;
John Koleszar's avatar
John Koleszar committed
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
  int offset;
  int pre_stride = x->e_mbd.block[16].pre_stride;

  if (mv_row < 0)
    mv_row -= 1;
  else
    mv_row += 1;

  if (mv_col < 0)
    mv_col -= 1;
  else
    mv_col += 1;

  mv_row /= 2;
  mv_col /= 2;

  offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
  uptr = x->e_mbd.pre.u_buffer + offset;
  vptr = x->e_mbd.pre.v_buffer + offset;

  if ((mv_row | mv_col) & 7) {
392
    vp9_sub_pixel_variance8x8(uptr, pre_stride, (mv_col & 7) << 1,
393
                              (mv_row & 7) << 1, upred_ptr, uv_stride, &sse2);
394
    vp9_sub_pixel_variance8x8(vptr, pre_stride, (mv_col & 7) << 1,
395
                              (mv_row & 7) << 1, vpred_ptr, uv_stride, &sse1);
John Koleszar's avatar
John Koleszar committed
396
397
    sse2 += sse1;
  } else {
398
399
    vp9_variance8x8(uptr, pre_stride, upred_ptr, uv_stride, &sse2);
    vp9_variance8x8(vptr, pre_stride, vpred_ptr, uv_stride, &sse1);
John Koleszar's avatar
John Koleszar committed
400
401
402
    sse2 += sse1;
  }
  return sse2;
John Koleszar's avatar
John Koleszar committed
403
404
405

}

406
407
408
409
410
static INLINE int cost_coeffs(MACROBLOCK *mb,
                              BLOCKD *b, PLANE_TYPE type,
                              ENTROPY_CONTEXT *a,
                              ENTROPY_CONTEXT *l,
                              TX_SIZE tx_size) {
411
  int pt;
Daniel Kang's avatar
Daniel Kang committed
412
  const int eob = b->eob;
413
  MACROBLOCKD *xd = &mb->e_mbd;
414
  const int ib = (int)(b - xd->block);
415
  int c = 0;
416
417
  int cost = 0, seg_eob;
  const int segment_id = xd->mode_info_context->mbmi.segment_id;
418
  const int *scan;
419
420
421
  int16_t *qcoeff_ptr = b->qcoeff;
  const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
                          get_tx_type(xd, b) : DCT_DCT;
422
423
424
  unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
      (tx_type == DCT_DCT) ? mb->token_costs[tx_size][type] :
                             mb->hybrid_token_costs[tx_size][type];
425
  ENTROPY_CONTEXT a_ec = *a, l_ec = *l;
426

427
  switch (tx_size) {
Daniel Kang's avatar
Daniel Kang committed
428
    case TX_4X4:
429
430
      scan = vp9_default_zig_zag1d_4x4;
      seg_eob = 16;
Deb Mukherjee's avatar
Deb Mukherjee committed
431
      if (type == PLANE_TYPE_Y_WITH_DC) {
432
433
434
435
        if (tx_type == ADST_DCT) {
          scan = vp9_row_scan_4x4;
        } else if (tx_type == DCT_ADST) {
          scan = vp9_col_scan_4x4;
436
        }
Daniel Kang's avatar
Daniel Kang committed
437
438
439
      }
      break;
    case TX_8X8:
440
441
      scan = vp9_default_zig_zag1d_8x8;
      seg_eob = 64;
Daniel Kang's avatar
Daniel Kang committed
442
443
      break;
    case TX_16X16:
444
      scan = vp9_default_zig_zag1d_16x16;
445
446
447
448
      seg_eob = 256;
      if (type == PLANE_TYPE_UV) {
        const int uv_idx = ib - 16;
        qcoeff_ptr = xd->sb_coeff_data.qcoeff + 1024 + 64 * uv_idx;
Deb Mukherjee's avatar
Deb Mukherjee committed
449
      }
Daniel Kang's avatar
Daniel Kang committed
450
      break;
451
452
    case TX_32X32:
      scan = vp9_default_zig_zag1d_32x32;
453
      seg_eob = 1024;
454
455
      qcoeff_ptr = xd->sb_coeff_data.qcoeff;
      break;
Daniel Kang's avatar
Daniel Kang committed
456
    default:
457
      abort();
Daniel Kang's avatar
Daniel Kang committed
458
459
      break;
  }
460

461
462
  VP9_COMBINEENTROPYCONTEXTS(pt, a_ec, l_ec);

Paul Wilkins's avatar
Paul Wilkins committed
463
464
  if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))
    seg_eob = 0;
465

466
  if (tx_type != DCT_DCT) {
467
    int recent_energy = 0;
468
469
    for (; c < eob; c++) {
      int v = qcoeff_ptr[scan[c]];
470
      int t = vp9_dct_value_tokens_ptr[v].Token;
471
      cost += token_costs[get_coef_band(c)][pt][t];
472
      cost += vp9_dct_value_cost_ptr[v];
473
      pt = vp9_get_coef_context(&recent_energy, t);
474
475
    }
    if (c < seg_eob)
476
      cost += mb->hybrid_token_costs[tx_size][type][get_coef_band(c)]
477
          [pt][DCT_EOB_TOKEN];
478
  } else {
479
    int recent_energy = 0;
480
481
    for (; c < eob; c++) {
      int v = qcoeff_ptr[scan[c]];
482
      int t = vp9_dct_value_tokens_ptr[v].Token;
483
      cost += token_costs[get_coef_band(c)][pt][t];
484
      cost += vp9_dct_value_cost_ptr[v];
485
      pt = vp9_get_coef_context(&recent_energy, t);
486
487
    }
    if (c < seg_eob)
488
      cost += mb->token_costs[tx_size][type][get_coef_band(c)]
489
          [pt][DCT_EOB_TOKEN];
490
491
  }

492
  // is eob first coefficient;
493
  pt = (c > 0);
494
495
496
497
  *a = *l = pt;
  return cost;
}

498
static int rdcost_mby_4x4(MACROBLOCK *mb, int backup) {
John Koleszar's avatar
John Koleszar committed
499
500
  int cost = 0;
  int b;
Paul Wilkins's avatar
Paul Wilkins committed
501
  MACROBLOCKD *xd = &mb->e_mbd;
John Koleszar's avatar
John Koleszar committed
502
503
504
  ENTROPY_CONTEXT_PLANES t_above, t_left;
  ENTROPY_CONTEXT *ta;
  ENTROPY_CONTEXT *tl;
505

506
507
508
  if (backup) {
    vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
John Koleszar's avatar
John Koleszar committed
509

510
511
512
513
514
515
    ta = (ENTROPY_CONTEXT *)&t_above;
    tl = (ENTROPY_CONTEXT *)&t_left;
  } else {
    ta = (ENTROPY_CONTEXT *)xd->above_context;
    tl = (ENTROPY_CONTEXT *)xd->left_context;
  }
John Koleszar's avatar
John Koleszar committed
516

John Koleszar's avatar
John Koleszar committed
517
  for (b = 0; b < 16; b++)
518
    cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_Y_WITH_DC,
Yaowu Xu's avatar
Yaowu Xu committed
519
520
                        ta + vp9_block2above[TX_4X4][b],
                        tl + vp9_block2left[TX_4X4][b],
Daniel Kang's avatar
Daniel Kang committed
521
                        TX_4X4);
John Koleszar's avatar
John Koleszar committed
522

John Koleszar's avatar
John Koleszar committed
523
  return cost;
John Koleszar's avatar
John Koleszar committed
524
525
}

526
527
528
static void macro_block_yrd_4x4(MACROBLOCK *mb,
                                int *Rate,
                                int *Distortion,
529
                                int *skippable, int backup) {
Paul Wilkins's avatar
Paul Wilkins committed
530
  MACROBLOCKD *const xd = &mb->e_mbd;
531

532
533
534
  xd->mode_info_context->mbmi.txfm_size = TX_4X4;
  vp9_transform_mby_4x4(mb);
  vp9_quantize_mby_4x4(mb);
535

536
537
538
  *Distortion = vp9_mbblock_error(mb) >> 2;
  *Rate = rdcost_mby_4x4(mb, backup);
  *skippable = vp9_mby_is_skippable_4x4(xd);
539
}
John Koleszar's avatar
John Koleszar committed
540

541
static int rdcost_mby_8x8(MACROBLOCK *mb, int backup) {
John Koleszar's avatar
John Koleszar committed
542
543
  int cost = 0;
  int b;
Paul Wilkins's avatar
Paul Wilkins committed
544
  MACROBLOCKD *xd = &mb->e_mbd;
John Koleszar's avatar
John Koleszar committed
545
546
547
548
  ENTROPY_CONTEXT_PLANES t_above, t_left;
  ENTROPY_CONTEXT *ta;
  ENTROPY_CONTEXT *tl;

Ronald S. Bultje's avatar
Ronald S. Bultje committed
549
550
551
  if (backup) {
    vpx_memcpy(&t_above,xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
John Koleszar's avatar
John Koleszar committed
552

Ronald S. Bultje's avatar
Ronald S. Bultje committed
553
554
555
556
557
558
    ta = (ENTROPY_CONTEXT *)&t_above;
    tl = (ENTROPY_CONTEXT *)&t_left;
  } else {
    ta = (ENTROPY_CONTEXT *)mb->e_mbd.above_context;
    tl = (ENTROPY_CONTEXT *)mb->e_mbd.left_context;
  }
John Koleszar's avatar
John Koleszar committed
559
560

  for (b = 0; b < 16; b += 4)
561
    cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_Y_WITH_DC,
Yaowu Xu's avatar
Yaowu Xu committed
562
563
                        ta + vp9_block2above[TX_8X8][b],
                        tl + vp9_block2left[TX_8X8][b],
Daniel Kang's avatar
Daniel Kang committed
564
                        TX_8X8);
John Koleszar's avatar
John Koleszar committed
565
566

  return cost;
567
568
}

John Koleszar's avatar
John Koleszar committed
569
570
571
static void macro_block_yrd_8x8(MACROBLOCK *mb,
                                int *Rate,
                                int *Distortion,
572
                                int *skippable, int backup) {
Paul Wilkins's avatar
Paul Wilkins committed
573
  MACROBLOCKD *const xd = &mb->e_mbd;
574
575

  xd->mode_info_context->mbmi.txfm_size = TX_8X8;
576
577
  vp9_transform_mby_8x8(mb);
  vp9_quantize_mby_8x8(mb);
John Koleszar's avatar
John Koleszar committed
578

579
580
581
  *Distortion = vp9_mbblock_error(mb) >> 2;
  *Rate = rdcost_mby_8x8(mb, backup);
  *skippable = vp9_mby_is_skippable_8x8(xd);
582
}
583

584
static int rdcost_mby_16x16(MACROBLOCK *mb, int backup) {
Daniel Kang's avatar
Daniel Kang committed
585
  int cost;
Paul Wilkins's avatar
Paul Wilkins committed
586
  MACROBLOCKD *xd = &mb->e_mbd;
Daniel Kang's avatar
Daniel Kang committed
587
588
589
  ENTROPY_CONTEXT_PLANES t_above, t_left;
  ENTROPY_CONTEXT *ta, *tl;

590
591
592
  if (backup) {
    vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
Daniel Kang's avatar
Daniel Kang committed
593

594
595
596
597
598
599
    ta = (ENTROPY_CONTEXT *)&t_above;
    tl = (ENTROPY_CONTEXT *)&t_left;
  } else {
    ta = (ENTROPY_CONTEXT *)xd->above_context;
    tl = (ENTROPY_CONTEXT *)xd->left_context;
  }
Daniel Kang's avatar
Daniel Kang committed
600

Paul Wilkins's avatar
Paul Wilkins committed
601
  cost = cost_coeffs(mb, xd->block, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_16X16);
Daniel Kang's avatar
Daniel Kang committed
602
603
  return cost;
}
604

Daniel Kang's avatar
Daniel Kang committed
605
static void macro_block_yrd_16x16(MACROBLOCK *mb, int *Rate, int *Distortion,
606
                                  int *skippable, int backup) {
Deb Mukherjee's avatar
Deb Mukherjee committed
607
  MACROBLOCKD *xd = &mb->e_mbd;
Daniel Kang's avatar
Daniel Kang committed
608

609
610
  xd->mode_info_context->mbmi.txfm_size = TX_16X16;
  vp9_transform_mby_16x16(mb);
611
  vp9_quantize_mby_16x16(mb);
612
613
614
  // TODO(jingning) is it possible to quickly determine whether to force
  //                trailing coefficients to be zero, instead of running trellis
  //                optimization in the rate-distortion optimization loop?
615
616
  if (mb->optimize &&
      xd->mode_info_context->mbmi.mode < I8X8_PRED)
617
    vp9_optimize_mby_16x16(mb);
618

619
  *Distortion = vp9_mbblock_error(mb) >> 2;
620
  *Rate = rdcost_mby_16x16(mb, backup);
621
  *skippable = vp9_mby_is_skippable_16x16(xd);
Daniel Kang's avatar
Daniel Kang committed
622
623
}

624
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
625
626
627
628
629
                                     int (*r)[2], int *rate,
                                     int *d, int *distortion,
                                     int *s, int *skip,
                                     int64_t txfm_cache[NB_TXFM_MODES],
                                     TX_SIZE max_txfm_size) {
630
631
632
633
634
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
  vp9_prob skip_prob = cm->mb_no_coeff_skip ?
                       vp9_get_pred_prob(cm, xd, PRED_MBSKIP) : 128;
635
636
637
638
639
640
641
642
643
644
645
646
  int64_t rd[TX_SIZE_MAX_SB][2];
  int n, m;

  for (n = TX_4X4; n <= max_txfm_size; n++) {
    r[n][1] = r[n][0];
    for (m = 0; m <= n - (n == max_txfm_size); m++) {
      if (m == n)
        r[n][1] += vp9_cost_zero(cm->prob_tx[m]);
      else
        r[n][1] += vp9_cost_one(cm->prob_tx[m]);
    }
  }
647
648
649
650
651
652
653
654

  if (cm->mb_no_coeff_skip) {
    int s0, s1;

    assert(skip_prob > 0);
    s0 = vp9_cost_bit(skip_prob, 0);
    s1 = vp9_cost_bit(skip_prob, 1);

655
    for (n = TX_4X4; n <= max_txfm_size; n++) {
656
      if (s[n]) {
657
        rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
658
      } else {
659
660
        rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
        rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
661
      }
662
663
    }
  } else {
664
665
666
    for (n = TX_4X4; n <= max_txfm_size; n++) {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0], d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1], d[n]);
667
668
669
    }
  }

670
671
672
673
674
675
  if (max_txfm_size == TX_32X32 &&
      (cm->txfm_mode == ALLOW_32X32 ||
       (cm->txfm_mode == TX_MODE_SELECT &&
        rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
        rd[TX_32X32][1] < rd[TX_4X4][1]))) {
    mbmi->txfm_size = TX_32X32;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
676
677
678
679
680
  } else if ( cm->txfm_mode == ALLOW_16X16 ||
             (max_txfm_size == TX_16X16 && cm->txfm_mode == ALLOW_32X32) ||
             (cm->txfm_mode == TX_MODE_SELECT &&
              rd[TX_16X16][1] < rd[TX_8X8][1] &&
              rd[TX_16X16][1] < rd[TX_4X4][1])) {
681
    mbmi->txfm_size = TX_16X16;
682
  } else if (cm->txfm_mode == ALLOW_8X8 ||
683
           (cm->txfm_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
684
685
    mbmi->txfm_size = TX_8X8;
  } else {
686
    assert(cm->txfm_mode == ONLY_4X4 || cm->txfm_mode == TX_MODE_SELECT);
687
688
689
    mbmi->txfm_size = TX_4X4;
  }

690
  *distortion = d[mbmi->txfm_size];
691
  *rate       = r[mbmi->txfm_size][cm->txfm_mode == TX_MODE_SELECT];
692
693
  *skip       = s[mbmi->txfm_size];

694
695
696
697
698
699
700
701
  txfm_cache[ONLY_4X4] = rd[TX_4X4][0];
  txfm_cache[ALLOW_8X8] = rd[TX_8X8][0];
  txfm_cache[ALLOW_16X16] = rd[TX_16X16][0];
  txfm_cache[ALLOW_32X32] = rd[max_txfm_size][0];
  if (max_txfm_size == TX_32X32 &&
      rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
      rd[TX_32X32][1] < rd[TX_4X4][1])
    txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
Ronald S. Bultje's avatar
Ronald S. Bultje committed
702
  else if (rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
703
    txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
704
  else
705
706
    txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
                                 rd[TX_4X4][1] : rd[TX_8X8][1];
707
708
709
710
711
712
}

static void macro_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
                            int *distortion, int *skippable,
                            int64_t txfm_cache[NB_TXFM_MODES]) {
  MACROBLOCKD *const xd = &x->e_mbd;
713
  int r[TX_SIZE_MAX_MB][2], d[TX_SIZE_MAX_MB], s[TX_SIZE_MAX_MB];
714
715
716

  vp9_subtract_mby(x->src_diff, *(x->block[0].base_src), xd->predictor,
                   x->block[0].src_stride);
717

718
719
720
  macro_block_yrd_16x16(x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16], 1);
  macro_block_yrd_8x8(x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], 1);
  macro_block_yrd_4x4(x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], 1);
721
722

  choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skippable,
723
                           txfm_cache, TX_16X16);
724
725
}

726
static void copy_predictor(uint8_t *dst, const uint8_t *predictor) {
John Koleszar's avatar
John Koleszar committed
727
728
729
730
731
732
  const unsigned int *p = (const unsigned int *)predictor;
  unsigned int *d = (unsigned int *)dst;
  d[0] = p[0];
  d[4] = p[4];
  d[8] = p[8];
  d[12] = p[12];
733
}
Yaowu Xu's avatar
Yaowu Xu committed
734

Ronald S. Bultje's avatar
Ronald S. Bultje committed
735
static int rdcost_sby_32x32(MACROBLOCK *x, int backup) {
736
737
  MACROBLOCKD * const xd = &x->e_mbd;
  ENTROPY_CONTEXT_PLANES t_above, t_left;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
738
  ENTROPY_CONTEXT *ta, *tl;
739

Ronald S. Bultje's avatar
Ronald S. Bultje committed
740
741
742
743
744
745
746
747
748
749
  if (backup) {
    ta = (ENTROPY_CONTEXT *) &t_above,
    tl = (ENTROPY_CONTEXT *) &t_left;

    vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    vpx_memcpy(&t_left,  xd->left_context,  sizeof(ENTROPY_CONTEXT_PLANES));
  } else {
    ta = (ENTROPY_CONTEXT *) xd->above_context;
    tl = (ENTROPY_CONTEXT *) xd->left_context;
  }
750
751
752
753

  return cost_coeffs(x, xd->block, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_32X32);
}

754
755
static int vp9_sb_block_error_c(int16_t *coeff, int16_t *dqcoeff,
                                int block_size) {
756
757
758
759
760
761
762
763
764
765
766
767
768
  int i;
  int64_t error = 0;

  for (i = 0; i < block_size; i++) {
    unsigned int this_diff = coeff[i] - dqcoeff[i];
    error += this_diff * this_diff;
  }

  return error > INT_MAX ? INT_MAX : error;
}

#define DEBUG_ERROR 0
static void super_block_yrd_32x32(MACROBLOCK *x,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
769
770
                                  int *rate, int *distortion, int *skippable,
                                  int backup) {
771
772
773
  SUPERBLOCK  * const x_sb = &x->sb_coeff_data;
  MACROBLOCKD * const xd = &x->e_mbd;
  SUPERBLOCKD * const xd_sb = &xd->sb_coeff_data;
774
#if DEBUG_ERROR
775
  int16_t out[1024];
776
777
778
779
#endif

  vp9_transform_sby_32x32(x);
  vp9_quantize_sby_32x32(x);
780
#if DEBUG_ERROR
781
782
783
784
  vp9_short_idct32x32(xd_sb->dqcoeff, out, 64);
#endif

  *distortion = vp9_sb_block_error_c(x_sb->coeff, xd_sb->dqcoeff, 1024);
785

786
787
788
789
#if DEBUG_ERROR
  printf("IDCT/FDCT error 32x32: %d (d: %d)\n",
         vp9_block_error_c(x_sb->src_diff, out, 1024), *distortion);
#endif
Ronald S. Bultje's avatar
Ronald S. Bultje committed
790
  *rate       = rdcost_sby_32x32(x, backup);
791
792
793
  *skippable  = vp9_sby_is_skippable_32x32(&x->e_mbd);
}

794
795
static void super_block_yrd(VP9_COMP *cpi,
                            MACROBLOCK *x, int *rate, int *distortion,
796
                            int *skip,
797
                            int64_t txfm_cache[NB_TXFM_MODES]) {
Ronald S. Bultje's avatar
Ronald S. Bultje committed
798
  MACROBLOCKD *const xd = &x->e_mbd;
799
  int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB], n;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
800
801
  const uint8_t *src = x->src.y_buffer, *dst = xd->dst.y_buffer;
  int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride;
802
803
804
805
  ENTROPY_CONTEXT_PLANES t_above[TX_SIZE_MAX_MB][2],
                        *orig_above = xd->above_context;
  ENTROPY_CONTEXT_PLANES t_left[TX_SIZE_MAX_MB][2],
                        *orig_left = xd->left_context;
806

807
  for (n = TX_4X4; n < TX_SIZE_MAX_MB; n++) {
808
809
    vpx_memcpy(t_above[n], xd->above_context, sizeof(t_above[n]));
    vpx_memcpy(t_left[n], xd->left_context, sizeof(t_left[n]));
810
    r[n][0] = 0;
811
812
813
    d[n] = 0;
    s[n] = 1;
  }
Ronald S. Bultje's avatar
Ronald S. Bultje committed
814

815
816
  vp9_subtract_sby_s_c(x->sb_coeff_data.src_diff, src, src_y_stride,
                       dst, dst_y_stride);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
817
  super_block_yrd_32x32(x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32], 1);
818
819
820
821

#if DEBUG_ERROR
  int err[3] = { 0, 0, 0 };
#endif
Ronald S. Bultje's avatar
Ronald S. Bultje committed
822
823
  for (n = 0; n < 4; n++) {
    int x_idx = n & 1, y_idx = n >> 1;
824
    int r_tmp, d_tmp, s_tmp;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
825

826
    vp9_subtract_mby_s_c(x->src_diff,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
827
828
829
830
                         src + x_idx * 16 + y_idx * 16 * src_y_stride,
                         src_y_stride,
                         dst + x_idx * 16 + y_idx * 16 * dst_y_stride,
                         dst_y_stride);
831
832
833

    xd->above_context = &t_above[TX_16X16][x_idx];
    xd->left_context = &t_left[TX_16X16][y_idx];
834
    macro_block_yrd_16x16(x, &r_tmp, &d_tmp, &s_tmp, 0);
835
    d[TX_16X16] += d_tmp;
836
    r[TX_16X16][0] += r_tmp;
837
    s[TX_16X16] = s[TX_16X16] && s_tmp;
838
839
840
841
#if DEBUG_ERROR
    vp9_inverse_transform_mby_16x16(xd);
    err[2] += vp9_block_error_c(xd->diff, x->src_diff, 256);
#endif
842
843
844

    xd->above_context = &t_above[TX_4X4][x_idx];
    xd->left_context = &t_left[TX_4X4][y_idx];
845
    macro_block_yrd_4x4(x, &r_tmp, &d_tmp, &s_tmp, 0);
846
    d[TX_4X4] += d_tmp;
847
    r[TX_4X4][0] += r_tmp;
848
    s[TX_4X4] = s[TX_4X4] && s_tmp;
849
850
851
852
#if DEBUG_ERROR
    vp9_inverse_transform_mby_4x4(xd);
    err[0] += vp9_block_error_c(xd->diff, x->src_diff, 256);
#endif
853
854
855

    xd->above_context = &t_above[TX_8X8][x_idx];
    xd->left_context = &t_left[TX_8X8][y_idx];
856
    macro_block_yrd_8x8(x, &r_tmp, &d_tmp, &s_tmp, 0);
857
    d[TX_8X8] += d_tmp;
858
    r[TX_8X8][0] += r_tmp;
859
    s[TX_8X8] = s[TX_8X8] && s_tmp;
860
861
862
863
#if DEBUG_ERROR
    vp9_inverse_transform_mby_8x8(xd);
    err[1] += vp9_block_error_c(xd->diff, x->src_diff, 256);
#endif
Ronald S. Bultje's avatar
Ronald S. Bultje committed
864
  }
865
866
867
868
869
870
871
#if DEBUG_ERROR
  printf("IDCT/FDCT error 16x16: %d (d: %d)\n", err[2], d[2]);
  printf("IDCT/FDCT error 8x8: %d (d: %d)\n", err[1], d[1]);
  printf("IDCT/FDCT error 4x4: %d (d: %d)\n", err[0], d[0]);
#endif
  choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache,
                           TX_SIZE_MAX_SB - 1);
872
873
874

  xd->above_context = orig_above;
  xd->left_context = orig_left;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
875
}
Ronald S. Bultje's avatar
Ronald S. Bultje committed
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971

static void super_block_64_yrd(VP9_COMP *cpi,
                               MACROBLOCK *x, int *rate, int *distortion,
                               int *skip,
                               int64_t txfm_cache[NB_TXFM_MODES]) {
  MACROBLOCKD *const xd = &x->e_mbd;
  int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB], n;
  const uint8_t *src = x->src.y_buffer, *dst = xd->dst.y_buffer;
  int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride;
  ENTROPY_CONTEXT_PLANES t_above[TX_SIZE_MAX_SB][4],
                        *orig_above = xd->above_context;
  ENTROPY_CONTEXT_PLANES t_left[TX_SIZE_MAX_SB][4],
                        *orig_left = xd->left_context;

  for (n = TX_4X4; n < TX_SIZE_MAX_SB; n++) {
    vpx_memcpy(t_above[n], xd->above_context, sizeof(t_above[n]));
    vpx_memcpy(t_left[n], xd->left_context, sizeof(t_left[n]));
    r[n][0] = 0;
    d[n] = 0;
    s[n] = 1;
  }

  for (n = 0; n < 4; n++) {
    int x_idx = n & 1, y_idx = n >> 1;
    int r_tmp, d_tmp, s_tmp;

    xd->above_context = &t_above[TX_32X32][x_idx << 1];
    xd->left_context = &t_left[TX_32X32][y_idx << 1];
    vp9_subtract_sby_s_c(x->sb_coeff_data.src_diff,
                         src + 32 * x_idx + 32 * y_idx * src_y_stride,
                         src_y_stride,
                         dst + 32 * x_idx + 32 * y_idx * dst_y_stride,
                         dst_y_stride);
    super_block_yrd_32x32(x, &r_tmp, &d_tmp, &s_tmp, 0);
    r[TX_32X32][0] += r_tmp;
    d[TX_32X32] += d_tmp;
    s[TX_32X32] = s[TX_32X32] && s_tmp;
  }

#if DEBUG_ERROR
  int err[3] = { 0, 0, 0 };
#endif
  for (n = 0; n < 16; n++) {
    int x_idx = n & 3, y_idx = n >> 2;
    int r_tmp, d_tmp, s_tmp;

    vp9_subtract_mby_s_c(x->src_diff,
                         src + x_idx * 16 + y_idx * 16 * src_y_stride,
                         src_y_stride,
                         dst + x_idx * 16 + y_idx * 16 * dst_y_stride,
                         dst_y_stride);

    xd->above_context = &t_above[TX_16X16][x_idx];
    xd->left_context = &t_left[TX_16X16][y_idx];
    macro_block_yrd_16x16(x, &r_tmp, &d_tmp, &s_tmp, 0);
    d[TX_16X16] += d_tmp;
    r[TX_16X16][0] += r_tmp;
    s[TX_16X16] = s[TX_16X16] && s_tmp;
#if DEBUG_ERROR
    vp9_inverse_transform_mby_16x16(xd);
    err[2] += vp9_block_error_c(xd->diff, x->src_diff, 256);
#endif

    xd->above_context = &t_above[TX_4X4][x_idx];
    xd->left_context = &t_left[TX_4X4][y_idx];
    macro_block_yrd_4x4(x, &r_tmp, &d_tmp, &s_tmp, 0);
    d[TX_4X4] += d_tmp;
    r[TX_4X4][0] += r_tmp;
    s[TX_4X4] = s[TX_4X4] && s_tmp;
#if DEBUG_ERROR
    vp9_inverse_transform_mby_4x4(xd);
    err[0] += vp9_block_error_c(xd->diff, x->src_diff, 256);
#endif

    xd->above_context = &t_above[TX_8X8][x_idx];
    xd->left_context = &t_left[TX_8X8][y_idx];
    macro_block_yrd_8x8(x, &r_tmp, &d_tmp, &s_tmp, 0);
    d[TX_8X8] += d_tmp;
    r[TX_8X8][0] += r_tmp;
    s[TX_8X8] = s[TX_8X8] && s_tmp;
#if DEBUG_ERROR
    vp9_inverse_transform_mby_8x8(xd);
    err[1] += vp9_block_error_c(xd->diff, x->src_diff, 256);
#endif
  }
#if DEBUG_ERROR
  printf("IDCT/FDCT error 16x16: %d (d: %d)\n", err[2], d[2]);
  printf("IDCT/FDCT error 8x8: %d (d: %d)\n", err[1], d[1]);
  printf("IDCT/FDCT error 4x4: %d (d: %d)\n", err[0], d[0]);
#endif
  choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache,
                           TX_SIZE_MAX_SB - 1);

  xd->above_context = orig_above;
  xd->left_context = orig_left;
}
Ronald S. Bultje's avatar
Ronald S. Bultje committed
972

973
static void copy_predictor_8x8(uint8_t *dst, const uint8_t *predictor) {
John Koleszar's avatar
John Koleszar committed
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
  const unsigned int *p = (const unsigned int *)predictor;
  unsigned int *d = (unsigned int *)dst;
  d[0] = p[0];
  d[1] = p[1];
  d[4] = p[4];
  d[5] = p[5];
  d[8] = p[8];
  d[9] = p[9];
  d[12] = p[12];
  d[13] = p[13];
  d[16] = p[16];
  d[17] = p[17];
  d[20] = p[20];
  d[21] = p[21];
  d[24] = p[24];
  d[25] = p[25];
  d[28] = p[28];
  d[29] = p[29];
Yaowu Xu's avatar
Yaowu Xu committed
992
993
}

994
static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be,
995
996
997
998
999
                                     BLOCKD *b, B_PREDICTION_MODE *best_mode,
                                     int *bmode_costs,
                                     ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                                     int *bestrate, int *bestratey,
                                     int *bestdistortion) {
John Koleszar's avatar
John Koleszar committed
1000
  B_PREDICTION_MODE mode;
Deb Mukherjee's avatar
Deb Mukherjee committed
1001
  MACROBLOCKD *xd = &x->e_mbd;
1002
  int64_t best_rd = INT64_MAX;