vp9_rdopt.c 196 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
11
12
13
14
15
 */


#include <stdio.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
16
17
#include "vp9/common/vp9_pragmas.h"

18
19
20
21
22
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_modecosts.h"
#include "vp9/encoder/vp9_encodeintra.h"
23
24
25
26
27
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_quant_common.h"
28
29
30
31
32
33
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_variance.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_ratectrl.h"
John Koleszar's avatar
John Koleszar committed
34
#include "vpx_mem/vpx_mem.h"
35
36
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
John Koleszar's avatar
John Koleszar committed
37

38
39
40
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_entropy.h"
41
#include "vp9_rtcd.h"
42
#include "vp9/common/vp9_mvref_common.h"
Ronald S. Bultje's avatar
Ronald S. Bultje committed
43
#include "vp9/common/vp9_common.h"
Paul Wilkins's avatar
Paul Wilkins committed
44

John Koleszar's avatar
John Koleszar committed
45
46
#define MAXF(a,b)            (((a) > (b)) ? (a) : (b))

47
48
#define INVALID_MV 0x80008000

49
50
51
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1

John Koleszar's avatar
John Koleszar committed
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
static const int auto_speed_thresh[17] = {
  1000,
  200,
  150,
  130,
  150,
  125,
  120,
  115,
  115,
  115,
  115,
  115,
  115,
  115,
  115,
  115,
  105
John Koleszar's avatar
John Koleszar committed
70
71
};

72
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
73
74
  {ZEROMV,    LAST_FRAME,   NONE},
  {DC_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
75

76
77
  {NEARESTMV, LAST_FRAME,   NONE},
  {NEARMV,    LAST_FRAME,   NONE},
John Koleszar's avatar
John Koleszar committed
78

79
80
  {ZEROMV,    GOLDEN_FRAME, NONE},
  {NEARESTMV, GOLDEN_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
81

82
83
  {ZEROMV,    ALTREF_FRAME, NONE},
  {NEARESTMV, ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
84

85
86
  {NEARMV,    GOLDEN_FRAME, NONE},
  {NEARMV,    ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
87

88
89
90
91
92
93
94
95
  {V_PRED,    INTRA_FRAME,  NONE},
  {H_PRED,    INTRA_FRAME,  NONE},
  {D45_PRED,  INTRA_FRAME,  NONE},
  {D135_PRED, INTRA_FRAME,  NONE},
  {D117_PRED, INTRA_FRAME,  NONE},
  {D153_PRED, INTRA_FRAME,  NONE},
  {D27_PRED,  INTRA_FRAME,  NONE},
  {D63_PRED,  INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
96

97
  {TM_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
98

99
100
101
  {NEWMV,     LAST_FRAME,   NONE},
  {NEWMV,     GOLDEN_FRAME, NONE},
  {NEWMV,     ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
102

103
104
105
  {SPLITMV,   LAST_FRAME,   NONE},
  {SPLITMV,   GOLDEN_FRAME, NONE},
  {SPLITMV,   ALTREF_FRAME, NONE},
106

107
108
  {B_PRED,    INTRA_FRAME,  NONE},
  {I8X8_PRED, INTRA_FRAME,  NONE},
109

John Koleszar's avatar
John Koleszar committed
110
111
112
113
  /* compound prediction modes */
  {ZEROMV,    LAST_FRAME,   GOLDEN_FRAME},
  {NEARESTMV, LAST_FRAME,   GOLDEN_FRAME},
  {NEARMV,    LAST_FRAME,   GOLDEN_FRAME},
114

John Koleszar's avatar
John Koleszar committed
115
116
117
  {ZEROMV,    ALTREF_FRAME, LAST_FRAME},
  {NEARESTMV, ALTREF_FRAME, LAST_FRAME},
  {NEARMV,    ALTREF_FRAME, LAST_FRAME},
118

John Koleszar's avatar
John Koleszar committed
119
120
121
  {ZEROMV,    GOLDEN_FRAME, ALTREF_FRAME},
  {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
  {NEARMV,    GOLDEN_FRAME, ALTREF_FRAME},
122

John Koleszar's avatar
John Koleszar committed
123
124
125
  {NEWMV,     LAST_FRAME,   GOLDEN_FRAME},
  {NEWMV,     ALTREF_FRAME, LAST_FRAME  },
  {NEWMV,     GOLDEN_FRAME, ALTREF_FRAME},
126

John Koleszar's avatar
John Koleszar committed
127
128
  {SPLITMV,   LAST_FRAME,   GOLDEN_FRAME},
  {SPLITMV,   ALTREF_FRAME, LAST_FRAME  },
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
  {SPLITMV,   GOLDEN_FRAME, ALTREF_FRAME},

#if CONFIG_COMP_INTERINTRA_PRED
  /* compound inter-intra prediction */
  {ZEROMV,    LAST_FRAME,   INTRA_FRAME},
  {NEARESTMV, LAST_FRAME,   INTRA_FRAME},
  {NEARMV,    LAST_FRAME,   INTRA_FRAME},
  {NEWMV,     LAST_FRAME,   INTRA_FRAME},

  {ZEROMV,    GOLDEN_FRAME,   INTRA_FRAME},
  {NEARESTMV, GOLDEN_FRAME,   INTRA_FRAME},
  {NEARMV,    GOLDEN_FRAME,   INTRA_FRAME},
  {NEWMV,     GOLDEN_FRAME,   INTRA_FRAME},

  {ZEROMV,    ALTREF_FRAME,   INTRA_FRAME},
  {NEARESTMV, ALTREF_FRAME,   INTRA_FRAME},
  {NEARMV,    ALTREF_FRAME,   INTRA_FRAME},
  {NEWMV,     ALTREF_FRAME,   INTRA_FRAME},
#endif
John Koleszar's avatar
John Koleszar committed
148
149
};

150
151
152
static void fill_token_costs(vp9_coeff_count *c,
                             vp9_coeff_probs *p,
                             int block_type_counts) {
153
  int i, j, k, l;
John Koleszar's avatar
John Koleszar committed
154
155

  for (i = 0; i < block_type_counts; i++)
156
157
158
159
160
161
162
163
164
165
166
167
    for (j = 0; j < REF_TYPES; j++)
      for (k = 0; k < COEF_BANDS; k++)
        for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
          if (l == 0 && k > 0)
            vp9_cost_tokens_skip((int *)(c[i][j][k][l]),
                                 p[i][j][k][l],
                                 vp9_coef_tree);
          else
            vp9_cost_tokens((int *)(c[i][j][k][l]),
                            p[i][j][k][l],
                            vp9_coef_tree);
        }
John Koleszar's avatar
John Koleszar committed
168
169
}

170

171
172
173
174
static int rd_iifactor[32] =  { 4, 4, 3, 2, 1, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0, };
John Koleszar's avatar
John Koleszar committed
175

176
// 3* dc_qlookup[Q]*dc_qlookup[Q];
177

178
/* values are now correlated to quantizer */
Paul Wilkins's avatar
Paul Wilkins committed
179
180
181
static int sad_per_bit16lut[QINDEX_RANGE];
static int sad_per_bit4lut[QINDEX_RANGE];

182
void vp9_init_me_luts() {
John Koleszar's avatar
John Koleszar committed
183
184
185
186
187
188
189
  int i;

  // Initialize the sad lut tables using a formulaic calculation for now
  // This is to make it easier to resolve the impact of experimental changes
  // to the quantizer tables.
  for (i = 0; i < QINDEX_RANGE; i++) {
    sad_per_bit16lut[i] =
190
191
      (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
    sad_per_bit4lut[i] = (int)((0.063 * vp9_convert_qindex_to_q(i)) + 2.742);
John Koleszar's avatar
John Koleszar committed
192
  }
Paul Wilkins's avatar
Paul Wilkins committed
193
}
John Koleszar's avatar
John Koleszar committed
194

195
static int compute_rd_mult(int qindex) {
John Koleszar's avatar
John Koleszar committed
196
  int q;
197

198
  q = vp9_dc_quant(qindex, 0);
John Koleszar's avatar
John Koleszar committed
199
  return (11 * q * q) >> 6;
200
201
}

202
void vp9_initialize_me_consts(VP9_COMP *cpi, int QIndex) {
John Koleszar's avatar
John Koleszar committed
203
204
  cpi->mb.sadperbit16 =  sad_per_bit16lut[QIndex];
  cpi->mb.sadperbit4  =  sad_per_bit4lut[QIndex];
John Koleszar's avatar
John Koleszar committed
205
206
}

207

208
void vp9_initialize_rd_consts(VP9_COMP *cpi, int QIndex) {
209
  int q, i;
John Koleszar's avatar
John Koleszar committed
210

211
  vp9_clear_system_state();  // __asm emms;
John Koleszar's avatar
John Koleszar committed
212

John Koleszar's avatar
John Koleszar committed
213
214
215
216
217
  // Further tests required to see if optimum is different
  // for key frames, golden frames and arf frames.
  // if (cpi->common.refresh_golden_frame ||
  //     cpi->common.refresh_alt_ref_frame)
  QIndex = (QIndex < 0) ? 0 : ((QIndex > MAXQ) ? MAXQ : QIndex);
218

John Koleszar's avatar
John Koleszar committed
219
  cpi->RDMULT = compute_rd_mult(QIndex);
John Koleszar's avatar
John Koleszar committed
220

John Koleszar's avatar
John Koleszar committed
221
222
223
224
225
226
227
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
    if (cpi->twopass.next_iiratio > 31)
      cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
    else
      cpi->RDMULT +=
        (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
  }
John Koleszar's avatar
John Koleszar committed
228

John Koleszar's avatar
John Koleszar committed
229
230
  if (cpi->RDMULT < 7)
    cpi->RDMULT = 7;
231

John Koleszar's avatar
John Koleszar committed
232
233
  cpi->mb.errorperbit = (cpi->RDMULT / 110);
  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
234

235
  vp9_set_speed_features(cpi);
John Koleszar's avatar
John Koleszar committed
236

237
  q = (int)pow(vp9_dc_quant(QIndex, 0) >> 2, 1.25);
John Koleszar's avatar
John Koleszar committed
238
239
  q = q << 2;
  cpi->RDMULT = cpi->RDMULT << 4;
Paul Wilkins's avatar
Paul Wilkins committed
240

John Koleszar's avatar
John Koleszar committed
241
242
  if (q < 8)
    q = 8;
243

John Koleszar's avatar
John Koleszar committed
244
245
246
  if (cpi->RDMULT > 1000) {
    cpi->RDDIV = 1;
    cpi->RDMULT /= 100;
John Koleszar's avatar
John Koleszar committed
247

John Koleszar's avatar
John Koleszar committed
248
249
250
251
252
253
    for (i = 0; i < MAX_MODES; i++) {
      if (cpi->sf.thresh_mult[i] < INT_MAX) {
        cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
      } else {
        cpi->rd_threshes[i] = INT_MAX;
      }
John Koleszar's avatar
John Koleszar committed
254

John Koleszar's avatar
John Koleszar committed
255
      cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
John Koleszar's avatar
John Koleszar committed
256
    }
John Koleszar's avatar
John Koleszar committed
257
258
  } else {
    cpi->RDDIV = 100;
John Koleszar's avatar
John Koleszar committed
259

John Koleszar's avatar
John Koleszar committed
260
261
262
263
264
265
    for (i = 0; i < MAX_MODES; i++) {
      if (cpi->sf.thresh_mult[i] < (INT_MAX / q)) {
        cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
      } else {
        cpi->rd_threshes[i] = INT_MAX;
      }
John Koleszar's avatar
John Koleszar committed
266

John Koleszar's avatar
John Koleszar committed
267
      cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
John Koleszar's avatar
John Koleszar committed
268
    }
John Koleszar's avatar
John Koleszar committed
269
  }
John Koleszar's avatar
John Koleszar committed
270

271
  fill_token_costs(cpi->mb.token_costs[TX_4X4],
272
                   cpi->common.fc.coef_probs_4x4, BLOCK_TYPES);
273
  fill_token_costs(cpi->mb.token_costs[TX_8X8],
274
                   cpi->common.fc.coef_probs_8x8, BLOCK_TYPES);
275
  fill_token_costs(cpi->mb.token_costs[TX_16X16],
276
                   cpi->common.fc.coef_probs_16x16, BLOCK_TYPES);
277
278
  fill_token_costs(cpi->mb.token_costs[TX_32X32],
                   cpi->common.fc.coef_probs_32x32, BLOCK_TYPES_32X32);
279

John Koleszar's avatar
John Koleszar committed
280
281
  /*rough estimate for costing*/
  cpi->common.kf_ymode_probs_index = cpi->common.base_qindex >> 4;
282
  vp9_init_mode_costs(cpi);
John Koleszar's avatar
John Koleszar committed
283

284
  if (cpi->common.frame_type != KEY_FRAME) {
285
    vp9_build_nmv_cost_table(
286
287
288
289
290
291
        cpi->mb.nmvjointcost,
        cpi->mb.e_mbd.allow_high_precision_mv ?
        cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
        &cpi->common.fc.nmvc,
        cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
  }
John Koleszar's avatar
John Koleszar committed
292
293
}

294
int vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, int block_size) {
295
  int i, error = 0;
John Koleszar's avatar
John Koleszar committed
296

297
  for (i = 0; i < block_size; i++) {
John Koleszar's avatar
John Koleszar committed
298
299
300
    int this_diff = coeff[i] - dqcoeff[i];
    error += this_diff * this_diff;
  }
John Koleszar's avatar
John Koleszar committed
301

John Koleszar's avatar
John Koleszar committed
302
  return error;
John Koleszar's avatar
John Koleszar committed
303
304
}

305
int vp9_mbblock_error_c(MACROBLOCK *mb) {
John Koleszar's avatar
John Koleszar committed
306
307
308
309
  BLOCK  *be;
  BLOCKD *bd;
  int i, j;
  int berror, error = 0;
John Koleszar's avatar
John Koleszar committed
310

John Koleszar's avatar
John Koleszar committed
311
312
313
314
  for (i = 0; i < 16; i++) {
    be = &mb->block[i];
    bd = &mb->e_mbd.block[i];
    berror = 0;
315
    for (j = 0; j < 16; j++) {
John Koleszar's avatar
John Koleszar committed
316
317
      int this_diff = be->coeff[j] - bd->dqcoeff[j];
      berror += this_diff * this_diff;
John Koleszar's avatar
John Koleszar committed
318
    }
John Koleszar's avatar
John Koleszar committed
319
320
321
    error += berror;
  }
  return error;
John Koleszar's avatar
John Koleszar committed
322
323
}

324
int vp9_mbuverror_c(MACROBLOCK *mb) {
John Koleszar's avatar
John Koleszar committed
325
326
  BLOCK  *be;
  BLOCKD *bd;
John Koleszar's avatar
John Koleszar committed
327

328
  int i, error = 0;
John Koleszar's avatar
John Koleszar committed
329

John Koleszar's avatar
John Koleszar committed
330
331
332
  for (i = 16; i < 24; i++) {
    be = &mb->block[i];
    bd = &mb->e_mbd.block[i];
John Koleszar's avatar
John Koleszar committed
333

334
    error += vp9_block_error_c(be->coeff, bd->dqcoeff, 16);
John Koleszar's avatar
John Koleszar committed
335
  }
John Koleszar's avatar
John Koleszar committed
336

John Koleszar's avatar
John Koleszar committed
337
  return error;
John Koleszar's avatar
John Koleszar committed
338
339
}

340
int vp9_uvsse(MACROBLOCK *x) {
341
342
343
  uint8_t *uptr, *vptr;
  uint8_t *upred_ptr = (*(x->block[16].base_src) + x->block[16].src);
  uint8_t *vpred_ptr = (*(x->block[20].base_src) + x->block[20].src);
John Koleszar's avatar
John Koleszar committed
344
345
346
347
  int uv_stride = x->block[16].src_stride;

  unsigned int sse1 = 0;
  unsigned int sse2 = 0;
348
349
  int mv_row = x->e_mbd.mode_info_context->mbmi.mv[0].as_mv.row;
  int mv_col = x->e_mbd.mode_info_context->mbmi.mv[0].as_mv.col;
John Koleszar's avatar
John Koleszar committed
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
  int offset;
  int pre_stride = x->e_mbd.block[16].pre_stride;

  if (mv_row < 0)
    mv_row -= 1;
  else
    mv_row += 1;

  if (mv_col < 0)
    mv_col -= 1;
  else
    mv_col += 1;

  mv_row /= 2;
  mv_col /= 2;

  offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
  uptr = x->e_mbd.pre.u_buffer + offset;
  vptr = x->e_mbd.pre.v_buffer + offset;

  if ((mv_row | mv_col) & 7) {
371
    vp9_sub_pixel_variance8x8(uptr, pre_stride, (mv_col & 7) << 1,
372
                              (mv_row & 7) << 1, upred_ptr, uv_stride, &sse2);
373
    vp9_sub_pixel_variance8x8(vptr, pre_stride, (mv_col & 7) << 1,
374
                              (mv_row & 7) << 1, vpred_ptr, uv_stride, &sse1);
John Koleszar's avatar
John Koleszar committed
375
376
    sse2 += sse1;
  } else {
377
378
    vp9_variance8x8(uptr, pre_stride, upred_ptr, uv_stride, &sse2);
    vp9_variance8x8(vptr, pre_stride, vpred_ptr, uv_stride, &sse1);
John Koleszar's avatar
John Koleszar committed
379
380
381
    sse2 += sse1;
  }
  return sse2;
John Koleszar's avatar
John Koleszar committed
382
383
384

}

385
386
387
388
389
static INLINE int cost_coeffs(MACROBLOCK *mb,
                              BLOCKD *b, PLANE_TYPE type,
                              ENTROPY_CONTEXT *a,
                              ENTROPY_CONTEXT *l,
                              TX_SIZE tx_size) {
390
  int pt;
Daniel Kang's avatar
Daniel Kang committed
391
  const int eob = b->eob;
392
  MACROBLOCKD *xd = &mb->e_mbd;
393
  const int ib = (int)(b - xd->block);
394
  int c = 0;
395
396
  int cost = 0, seg_eob;
  const int segment_id = xd->mode_info_context->mbmi.segment_id;
397
  const int *scan;
398
  int16_t *qcoeff_ptr = b->qcoeff;
399
  const int ref = xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME;
400
401
  const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
                          get_tx_type(xd, b) : DCT_DCT;
402
  unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
403
      mb->token_costs[tx_size][type][ref];
404
  ENTROPY_CONTEXT a_ec = *a, l_ec = *l;
405
406
407
408
  ENTROPY_CONTEXT *const a1 = a +
      sizeof(ENTROPY_CONTEXT_PLANES)/sizeof(ENTROPY_CONTEXT);
  ENTROPY_CONTEXT *const l1 = l +
      sizeof(ENTROPY_CONTEXT_PLANES)/sizeof(ENTROPY_CONTEXT);
409

410
  switch (tx_size) {
Daniel Kang's avatar
Daniel Kang committed
411
    case TX_4X4:
412
413
      scan = vp9_default_zig_zag1d_4x4;
      seg_eob = 16;
Deb Mukherjee's avatar
Deb Mukherjee committed
414
      if (type == PLANE_TYPE_Y_WITH_DC) {
415
416
417
418
        if (tx_type == ADST_DCT) {
          scan = vp9_row_scan_4x4;
        } else if (tx_type == DCT_ADST) {
          scan = vp9_col_scan_4x4;
419
        }
Daniel Kang's avatar
Daniel Kang committed
420
421
422
      }
      break;
    case TX_8X8:
423
424
      a_ec = (a[0] + a[1]) != 0;
      l_ec = (l[0] + l[1]) != 0;
425
426
      scan = vp9_default_zig_zag1d_8x8;
      seg_eob = 64;
Daniel Kang's avatar
Daniel Kang committed
427
428
      break;
    case TX_16X16:
429
      scan = vp9_default_zig_zag1d_16x16;
430
431
432
433
      seg_eob = 256;
      if (type == PLANE_TYPE_UV) {
        const int uv_idx = ib - 16;
        qcoeff_ptr = xd->sb_coeff_data.qcoeff + 1024 + 64 * uv_idx;
434
435
436
437
438
        a_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0;
        l_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0;
      } else {
        a_ec = (a[0] + a[1] + a[2] + a[3]) != 0;
        l_ec = (l[0] + l[1] + l[2] + l[3]) != 0;
Deb Mukherjee's avatar
Deb Mukherjee committed
439
      }
Daniel Kang's avatar
Daniel Kang committed
440
      break;
441
442
    case TX_32X32:
      scan = vp9_default_zig_zag1d_32x32;
443
      seg_eob = 1024;
444
      qcoeff_ptr = xd->sb_coeff_data.qcoeff;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
445
446
447
448
      a_ec = (a[0] + a[1] + a[2] + a[3] +
              a1[0] + a1[1] + a1[2] + a1[3]) != 0;
      l_ec = (l[0] + l[1] + l[2] + l[3] +
              l1[0] + l1[1] + l1[2] + l1[3]) != 0;
449
      break;
Daniel Kang's avatar
Daniel Kang committed
450
    default:
451
      abort();
Daniel Kang's avatar
Daniel Kang committed
452
453
      break;
  }
454

455
456
  VP9_COMBINEENTROPYCONTEXTS(pt, a_ec, l_ec);

Paul Wilkins's avatar
Paul Wilkins committed
457
458
  if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))
    seg_eob = 0;
459

460
  {
461
    int recent_energy = 0;
462
463
    for (; c < eob; c++) {
      int v = qcoeff_ptr[scan[c]];
464
      int t = vp9_dct_value_tokens_ptr[v].Token;
465
      cost += token_costs[get_coef_band(tx_size, c)][pt][t];
466
      cost += vp9_dct_value_cost_ptr[v];
467
      pt = vp9_get_coef_context(&recent_energy, t);
468
469
    }
    if (c < seg_eob)
470
      cost += mb->token_costs[tx_size][type][ref][get_coef_band(tx_size, c)]
471
          [pt][DCT_EOB_TOKEN];
472
473
  }

474
  // is eob first coefficient;
475
  pt = (c > 0);
476
  *a = *l = pt;
477
478
479
480
481
482
483
484
485
486
487
488
489
490
  if (tx_size >= TX_8X8) {
    a[1] = l[1] = pt;
    if (tx_size >= TX_16X16) {
      if (type == PLANE_TYPE_UV) {
        a1[0] = a1[1] = l1[0] = l1[1] = pt;
      } else {
        a[2] = a[3] = l[2] = l[3] = pt;
        if (tx_size >= TX_32X32) {
          a1[0] = a1[1] = a1[2] = a1[3] = pt;
          l1[0] = l1[1] = l1[2] = l1[3] = pt;
        }
      }
    }
  }
491
492
493
  return cost;
}

494
static int rdcost_mby_4x4(MACROBLOCK *mb, int backup) {
John Koleszar's avatar
John Koleszar committed
495
496
  int cost = 0;
  int b;
Paul Wilkins's avatar
Paul Wilkins committed
497
  MACROBLOCKD *xd = &mb->e_mbd;
John Koleszar's avatar
John Koleszar committed
498
499
500
  ENTROPY_CONTEXT_PLANES t_above, t_left;
  ENTROPY_CONTEXT *ta;
  ENTROPY_CONTEXT *tl;
501

502
503
504
  if (backup) {
    vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
John Koleszar's avatar
John Koleszar committed
505

506
507
508
509
510
511
    ta = (ENTROPY_CONTEXT *)&t_above;
    tl = (ENTROPY_CONTEXT *)&t_left;
  } else {
    ta = (ENTROPY_CONTEXT *)xd->above_context;
    tl = (ENTROPY_CONTEXT *)xd->left_context;
  }
John Koleszar's avatar
John Koleszar committed
512

John Koleszar's avatar
John Koleszar committed
513
  for (b = 0; b < 16; b++)
514
    cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_Y_WITH_DC,
Yaowu Xu's avatar
Yaowu Xu committed
515
516
                        ta + vp9_block2above[TX_4X4][b],
                        tl + vp9_block2left[TX_4X4][b],
Daniel Kang's avatar
Daniel Kang committed
517
                        TX_4X4);
John Koleszar's avatar
John Koleszar committed
518

John Koleszar's avatar
John Koleszar committed
519
  return cost;
John Koleszar's avatar
John Koleszar committed
520
521
}

522
523
524
static void macro_block_yrd_4x4(MACROBLOCK *mb,
                                int *Rate,
                                int *Distortion,
525
                                int *skippable, int backup) {
Paul Wilkins's avatar
Paul Wilkins committed
526
  MACROBLOCKD *const xd = &mb->e_mbd;
527

528
529
530
  xd->mode_info_context->mbmi.txfm_size = TX_4X4;
  vp9_transform_mby_4x4(mb);
  vp9_quantize_mby_4x4(mb);
531

532
533
534
  *Distortion = vp9_mbblock_error(mb) >> 2;
  *Rate = rdcost_mby_4x4(mb, backup);
  *skippable = vp9_mby_is_skippable_4x4(xd);
535
}
John Koleszar's avatar
John Koleszar committed
536

537
static int rdcost_mby_8x8(MACROBLOCK *mb, int backup) {
John Koleszar's avatar
John Koleszar committed
538
539
  int cost = 0;
  int b;
Paul Wilkins's avatar
Paul Wilkins committed
540
  MACROBLOCKD *xd = &mb->e_mbd;
John Koleszar's avatar
John Koleszar committed
541
542
543
544
  ENTROPY_CONTEXT_PLANES t_above, t_left;
  ENTROPY_CONTEXT *ta;
  ENTROPY_CONTEXT *tl;

Ronald S. Bultje's avatar
Ronald S. Bultje committed
545
546
547
  if (backup) {
    vpx_memcpy(&t_above,xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
John Koleszar's avatar
John Koleszar committed
548

Ronald S. Bultje's avatar
Ronald S. Bultje committed
549
550
551
552
553
554
    ta = (ENTROPY_CONTEXT *)&t_above;
    tl = (ENTROPY_CONTEXT *)&t_left;
  } else {
    ta = (ENTROPY_CONTEXT *)mb->e_mbd.above_context;
    tl = (ENTROPY_CONTEXT *)mb->e_mbd.left_context;
  }
John Koleszar's avatar
John Koleszar committed
555
556

  for (b = 0; b < 16; b += 4)
557
    cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_Y_WITH_DC,
Yaowu Xu's avatar
Yaowu Xu committed
558
559
                        ta + vp9_block2above[TX_8X8][b],
                        tl + vp9_block2left[TX_8X8][b],
Daniel Kang's avatar
Daniel Kang committed
560
                        TX_8X8);
John Koleszar's avatar
John Koleszar committed
561
562

  return cost;
563
564
}

John Koleszar's avatar
John Koleszar committed
565
566
567
static void macro_block_yrd_8x8(MACROBLOCK *mb,
                                int *Rate,
                                int *Distortion,
568
                                int *skippable, int backup) {
Paul Wilkins's avatar
Paul Wilkins committed
569
  MACROBLOCKD *const xd = &mb->e_mbd;
570
571

  xd->mode_info_context->mbmi.txfm_size = TX_8X8;
572
573
  vp9_transform_mby_8x8(mb);
  vp9_quantize_mby_8x8(mb);
John Koleszar's avatar
John Koleszar committed
574

575
576
577
  *Distortion = vp9_mbblock_error(mb) >> 2;
  *Rate = rdcost_mby_8x8(mb, backup);
  *skippable = vp9_mby_is_skippable_8x8(xd);
578
}
579

580
static int rdcost_mby_16x16(MACROBLOCK *mb, int backup) {
Daniel Kang's avatar
Daniel Kang committed
581
  int cost;
Paul Wilkins's avatar
Paul Wilkins committed
582
  MACROBLOCKD *xd = &mb->e_mbd;
Daniel Kang's avatar
Daniel Kang committed
583
584
585
  ENTROPY_CONTEXT_PLANES t_above, t_left;
  ENTROPY_CONTEXT *ta, *tl;

586
587
588
  if (backup) {
    vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
Daniel Kang's avatar
Daniel Kang committed
589

590
591
592
593
594
595
    ta = (ENTROPY_CONTEXT *)&t_above;
    tl = (ENTROPY_CONTEXT *)&t_left;
  } else {
    ta = (ENTROPY_CONTEXT *)xd->above_context;
    tl = (ENTROPY_CONTEXT *)xd->left_context;
  }
Daniel Kang's avatar
Daniel Kang committed
596

Paul Wilkins's avatar
Paul Wilkins committed
597
  cost = cost_coeffs(mb, xd->block, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_16X16);
Daniel Kang's avatar
Daniel Kang committed
598
599
  return cost;
}
600

Daniel Kang's avatar
Daniel Kang committed
601
static void macro_block_yrd_16x16(MACROBLOCK *mb, int *Rate, int *Distortion,
602
                                  int *skippable, int backup) {
Deb Mukherjee's avatar
Deb Mukherjee committed
603
  MACROBLOCKD *xd = &mb->e_mbd;
Daniel Kang's avatar
Daniel Kang committed
604

605
606
  xd->mode_info_context->mbmi.txfm_size = TX_16X16;
  vp9_transform_mby_16x16(mb);
607
  vp9_quantize_mby_16x16(mb);
608
609
610
  // TODO(jingning) is it possible to quickly determine whether to force
  //                trailing coefficients to be zero, instead of running trellis
  //                optimization in the rate-distortion optimization loop?
611
612
  if (mb->optimize &&
      xd->mode_info_context->mbmi.mode < I8X8_PRED)
613
    vp9_optimize_mby_16x16(mb);
614

615
  *Distortion = vp9_mbblock_error(mb) >> 2;
616
  *Rate = rdcost_mby_16x16(mb, backup);
617
  *skippable = vp9_mby_is_skippable_16x16(xd);
Daniel Kang's avatar
Daniel Kang committed
618
619
}

620
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
621
622
623
624
625
                                     int (*r)[2], int *rate,
                                     int *d, int *distortion,
                                     int *s, int *skip,
                                     int64_t txfm_cache[NB_TXFM_MODES],
                                     TX_SIZE max_txfm_size) {
626
627
628
629
630
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
  vp9_prob skip_prob = cm->mb_no_coeff_skip ?
                       vp9_get_pred_prob(cm, xd, PRED_MBSKIP) : 128;
631
632
633
634
635
636
637
638
639
640
641
642
  int64_t rd[TX_SIZE_MAX_SB][2];
  int n, m;

  for (n = TX_4X4; n <= max_txfm_size; n++) {
    r[n][1] = r[n][0];
    for (m = 0; m <= n - (n == max_txfm_size); m++) {
      if (m == n)
        r[n][1] += vp9_cost_zero(cm->prob_tx[m]);
      else
        r[n][1] += vp9_cost_one(cm->prob_tx[m]);
    }
  }
643
644
645
646
647
648
649
650

  if (cm->mb_no_coeff_skip) {
    int s0, s1;

    assert(skip_prob > 0);
    s0 = vp9_cost_bit(skip_prob, 0);
    s1 = vp9_cost_bit(skip_prob, 1);

651
    for (n = TX_4X4; n <= max_txfm_size; n++) {
652
      if (s[n]) {
653
        rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
654
      } else {
655
656
        rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
        rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
657
      }
658
659
    }
  } else {
660
661
662
    for (n = TX_4X4; n <= max_txfm_size; n++) {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0], d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1], d[n]);
663
664
665
    }
  }

666
667
668
669
670
671
  if (max_txfm_size == TX_32X32 &&
      (cm->txfm_mode == ALLOW_32X32 ||
       (cm->txfm_mode == TX_MODE_SELECT &&
        rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
        rd[TX_32X32][1] < rd[TX_4X4][1]))) {
    mbmi->txfm_size = TX_32X32;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
672
673
674
675
676
  } else if ( cm->txfm_mode == ALLOW_16X16 ||
             (max_txfm_size == TX_16X16 && cm->txfm_mode == ALLOW_32X32) ||
             (cm->txfm_mode == TX_MODE_SELECT &&
              rd[TX_16X16][1] < rd[TX_8X8][1] &&
              rd[TX_16X16][1] < rd[TX_4X4][1])) {
677
    mbmi->txfm_size = TX_16X16;
678
  } else if (cm->txfm_mode == ALLOW_8X8 ||
679
           (cm->txfm_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
680
681
    mbmi->txfm_size = TX_8X8;
  } else {
682
    assert(cm->txfm_mode == ONLY_4X4 || cm->txfm_mode == TX_MODE_SELECT);
683
684
685
    mbmi->txfm_size = TX_4X4;
  }

686
  *distortion = d[mbmi->txfm_size];
687
  *rate       = r[mbmi->txfm_size][cm->txfm_mode == TX_MODE_SELECT];
688
689
  *skip       = s[mbmi->txfm_size];

690
691
692
693
694
695
696
697
  txfm_cache[ONLY_4X4] = rd[TX_4X4][0];
  txfm_cache[ALLOW_8X8] = rd[TX_8X8][0];
  txfm_cache[ALLOW_16X16] = rd[TX_16X16][0];
  txfm_cache[ALLOW_32X32] = rd[max_txfm_size][0];
  if (max_txfm_size == TX_32X32 &&
      rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
      rd[TX_32X32][1] < rd[TX_4X4][1])
    txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
Ronald S. Bultje's avatar
Ronald S. Bultje committed
698
  else if (rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
699
    txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
700
  else
701
702
    txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
                                 rd[TX_4X4][1] : rd[TX_8X8][1];
703
704
705
706
707
708
}

static void macro_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
                            int *distortion, int *skippable,
                            int64_t txfm_cache[NB_TXFM_MODES]) {
  MACROBLOCKD *const xd = &x->e_mbd;
709
  int r[TX_SIZE_MAX_MB][2], d[TX_SIZE_MAX_MB], s[TX_SIZE_MAX_MB];
710
711
712

  vp9_subtract_mby(x->src_diff, *(x->block[0].base_src), xd->predictor,
                   x->block[0].src_stride);
713

714
715
716
  macro_block_yrd_16x16(x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16], 1);
  macro_block_yrd_8x8(x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], 1);
  macro_block_yrd_4x4(x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], 1);
717
718

  choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skippable,
719
                           txfm_cache, TX_16X16);
720
721
}

722
static void copy_predictor(uint8_t *dst, const uint8_t *predictor) {
John Koleszar's avatar
John Koleszar committed
723
724
725
726
727
728
  const unsigned int *p = (const unsigned int *)predictor;
  unsigned int *d = (unsigned int *)dst;
  d[0] = p[0];
  d[4] = p[4];
  d[8] = p[8];
  d[12] = p[12];
729
}
Yaowu Xu's avatar
Yaowu Xu committed
730

Ronald S. Bultje's avatar
Ronald S. Bultje committed
731
static int rdcost_sby_32x32(MACROBLOCK *x, int backup) {
732
  MACROBLOCKD * const xd = &x->e_mbd;
733
  ENTROPY_CONTEXT_PLANES t_above[2], t_left[2];
Ronald S. Bultje's avatar
Ronald S. Bultje committed
734
  ENTROPY_CONTEXT *ta, *tl;
735

Ronald S. Bultje's avatar
Ronald S. Bultje committed
736
737
738
739
  if (backup) {
    ta = (ENTROPY_CONTEXT *) &t_above,
    tl = (ENTROPY_CONTEXT *) &t_left;

740
741
    vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES) * 2);
    vpx_memcpy(&t_left,  xd->left_context,  sizeof(ENTROPY_CONTEXT_PLANES) * 2);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
742
743
744
745
  } else {
    ta = (ENTROPY_CONTEXT *) xd->above_context;
    tl = (ENTROPY_CONTEXT *) xd->left_context;
  }
746
747
748
749

  return cost_coeffs(x, xd->block, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_32X32);
}

750
751
static int vp9_sb_block_error_c(int16_t *coeff, int16_t *dqcoeff,
                                int block_size) {
752
753
754
755
756
757
758
759
760
761
762
763
764
  int i;
  int64_t error = 0;

  for (i = 0; i < block_size; i++) {
    unsigned int this_diff = coeff[i] - dqcoeff[i];
    error += this_diff * this_diff;
  }

  return error > INT_MAX ? INT_MAX : error;
}

#define DEBUG_ERROR 0
static void super_block_yrd_32x32(MACROBLOCK *x,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
765
766
                                  int *rate, int *distortion, int *skippable,
                                  int backup) {
767
768
769
  SUPERBLOCK  * const x_sb = &x->sb_coeff_data;
  MACROBLOCKD * const xd = &x->e_mbd;
  SUPERBLOCKD * const xd_sb = &xd->sb_coeff_data;
770
#if DEBUG_ERROR
771
  int16_t out[1024];
772
773
774
775
#endif

  vp9_transform_sby_32x32(x);
  vp9_quantize_sby_32x32(x);
776
#if DEBUG_ERROR
777
778
779
780
  vp9_short_idct32x32(xd_sb->dqcoeff, out, 64);
#endif

  *distortion = vp9_sb_block_error_c(x_sb->coeff, xd_sb->dqcoeff, 1024);
781

782
783
784
785
#if DEBUG_ERROR
  printf("IDCT/FDCT error 32x32: %d (d: %d)\n",
         vp9_block_error_c(x_sb->src_diff, out, 1024), *distortion);
#endif
Ronald S. Bultje's avatar
Ronald S. Bultje committed
786
  *rate       = rdcost_sby_32x32(x, backup);
787
788
789
  *skippable  = vp9_sby_is_skippable_32x32(&x->e_mbd);
}

790
791
static void super_block_yrd(VP9_COMP *cpi,
                            MACROBLOCK *x, int *rate, int *distortion,
792
                            int *skip,
793
                            int64_t txfm_cache[NB_TXFM_MODES]) {
Ronald S. Bultje's avatar
Ronald S. Bultje committed
794
  MACROBLOCKD *const xd = &x->e_mbd;
795
  int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB], n;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
796
797
  const uint8_t *src = x->src.y_buffer, *dst = xd->dst.y_buffer;
  int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride;
798
799
800
801
  ENTROPY_CONTEXT_PLANES t_above[TX_SIZE_MAX_MB][2],
                        *orig_above = xd->above_context;
  ENTROPY_CONTEXT_PLANES t_left[TX_SIZE_MAX_MB][2],
                        *orig_left = xd->left_context;
802

803
  for (n = TX_4X4; n < TX_SIZE_MAX_MB; n++) {
804
805
    vpx_memcpy(t_above[n], xd->above_context, sizeof(t_above[n]));
    vpx_memcpy(t_left[n], xd->left_context, sizeof(t_left[n]));
806
    r[n][0] = 0;
807
808
809
    d[n] = 0;
    s[n] = 1;
  }
Ronald S. Bultje's avatar
Ronald S. Bultje committed
810

811
812
  vp9_subtract_sby_s_c(x->sb_coeff_data.src_diff, src, src_y_stride,
                       dst, dst_y_stride);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
813
  super_block_yrd_32x32(x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32], 1);
814
815
816
817

#if DEBUG_ERROR
  int err[3] = { 0, 0, 0 };
#endif
Ronald S. Bultje's avatar
Ronald S. Bultje committed
818
819
  for (n = 0; n < 4; n++) {
    int x_idx = n & 1, y_idx = n >> 1;
820
    int r_tmp, d_tmp, s_tmp;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
821

822
    vp9_subtract_mby_s_c(x->src_diff,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
823
824
825
826
                         src + x_idx * 16 + y_idx * 16 * src_y_stride,
                         src_y_stride,
                         dst + x_idx * 16 + y_idx * 16 * dst_y_stride,
                         dst_y_stride);
827
828
829

    xd->above_context = &t_above[TX_16X16][x_idx];
    xd->left_context = &t_left[TX_16X16][y_idx];
830
    macro_block_yrd_16x16(x, &r_tmp, &d_tmp, &s_tmp, 0);
831
    d[TX_16X16] += d_tmp;
832
    r[TX_16X16][0] += r_tmp;
833
    s[TX_16X16] = s[TX_16X16] && s_tmp;
834
835
836
837
#if DEBUG_ERROR
    vp9_inverse_transform_mby_16x16(xd);
    err[2] += vp9_block_error_c(xd->diff, x->src_diff, 256);
#endif
838
839
840

    xd->above_context = &t_above[TX_4X4][x_idx];
    xd->left_context = &t_left[TX_4X4][y_idx];
841
    macro_block_yrd_4x4(x, &r_tmp, &d_tmp, &s_tmp, 0);
842
    d[TX_4X4] += d_tmp;
843
    r[TX_4X4][0] += r_tmp;
844
    s[TX_4X4] = s[TX_4X4] && s_tmp;
845
846
847
848
#if DEBUG_ERROR
    vp9_inverse_transform_mby_4x4(xd);
    err[0] += vp9_block_error_c(xd->diff, x->src_diff, 256);
#endif
849
850
851

    xd->above_context = &t_above[TX_8X8][x_idx];
    xd->left_context = &t_left[TX_8X8][y_idx];
852
    macro_block_yrd_8x8(x, &r_tmp, &d_tmp, &s_tmp, 0);
853
    d[TX_8X8] += d_tmp;
854
    r[TX_8X8][0] += r_tmp;
855
    s[TX_8X8] = s[TX_8X8] && s_tmp;
856
857
858
859
#if DEBUG_ERROR
    vp9_inverse_transform_mby_8x8(xd);
    err[1] += vp9_block_error_c(xd->diff, x->src_diff, 256);
#endif
Ronald S. Bultje's avatar
Ronald S. Bultje committed
860
  }
861
862
863
864
865
866
867
#if DEBUG_ERROR
  printf("IDCT/FDCT error 16x16: %d (d: %d)\n", err[2], d[2]);
  printf("IDCT/FDCT error 8x8: %d (d: %d)\n", err[1], d[1]);
  printf("IDCT/FDCT error 4x4: %d (d: %d)\n", err[0], d[0]);
#endif
  choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache,
                           TX_SIZE_MAX_SB - 1);
868
869
870

  xd->above_context = orig_above;
  xd->left_context = orig_left;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
871
}
Ronald S. Bultje's avatar
Ronald S. Bultje committed
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967

static void super_block_64_yrd(VP9_COMP *cpi,
                               MACROBLOCK *x, int *rate, int *distortion,
                               int *skip,
                               int64_t txfm_cache[NB_TXFM_MODES]) {
  MACROBLOCKD *const xd = &x->e_mbd;
  int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB], n;
  const uint8_t *src = x->src.y_buffer, *dst = xd->dst.y_buffer;
  int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride;
  ENTROPY_CONTEXT_PLANES t_above[TX_SIZE_MAX_SB][4],
                        *orig_above = xd->above_context;
  ENTROPY_CONTEXT_PLANES t_left[TX_SIZE_MAX_SB][4],
                        *orig_left = xd->left_context;

  for (n = TX_4X4; n < TX_SIZE_MAX_SB; n++) {
    vpx_memcpy(t_above[n], xd->above_context, sizeof(t_above[n]));
    vpx_memcpy(t_left[n], xd->left_context, sizeof(t_left[n]));
    r[n][0] = 0;
    d[n] = 0;
    s[n] = 1;
  }

  for (n = 0; n < 4; n++) {
    int x_idx = n & 1, y_idx = n >> 1;
    int r_tmp, d_tmp, s_tmp;

    xd->above_context = &t_above[TX_32X32][x_idx << 1];
    xd->left_context = &t_left[TX_32X32][y_idx << 1];
    vp9_subtract_sby_s_c(x->sb_coeff_data.src_diff,
                         src + 32 * x_idx + 32 * y_idx * src_y_stride,
                         src_y_stride,
                         dst + 32 * x_idx + 32 * y_idx * dst_y_stride,
                         dst_y_stride);
    super_block_yrd_32x32(x, &r_tmp, &d_tmp, &s_tmp, 0);
    r[TX_32X32][0] += r_tmp;
    d[TX_32X32] += d_tmp;
    s[TX_32X32] = s[TX_32X32] && s_tmp;
  }

#if DEBUG_ERROR
  int err[3] = { 0, 0, 0 };
#endif
  for (n = 0; n < 16; n++) {
    int x_idx = n & 3, y_idx = n >> 2;
    int r_tmp, d_tmp, s_tmp;

    vp9_subtract_mby_s_c(x->src_diff,
                         src + x_idx * 16 + y_idx * 16 * src_y_stride,
                         src_y_stride,
                         dst + x_idx * 16 + y_idx * 16 * dst_y_stride,
                         dst_y_stride);

    xd->above_context = &t_above[TX_16X16][x_idx];
    xd->left_context = &t_left[TX_16X16][y_idx];
    macro_block_yrd_16x16(x, &r_tmp, &d_tmp, &s_tmp, 0);
    d[TX_16X16] += d_tmp;
    r[TX_16X16][0] += r_tmp;
    s[TX_16X16] = s[TX_16X16] && s_tmp;
#if DEBUG_ERROR
    vp9_inverse_transform_mby_16x16(xd);
    err[2] += vp9_block_error_c(xd->diff, x->src_diff, 256);
#endif

    xd->above_context = &t_above[TX_4X4][x_idx];
    xd->left_context = &t_left[TX_4X4][y_idx];
    macro_block_yrd_4x4(x, &r_tmp, &d_tmp, &s_tmp, 0);
    d[TX_4X4] += d_tmp;
    r[TX_4X4][0] += r_tmp;
    s[TX_4X4] = s[TX_4X4] && s_tmp;
#if DEBUG_ERROR
    vp9_inverse_transform_mby_4x4(xd);
    err[0] += vp9_block_error_c(xd->diff, x->src_diff, 256);
#endif

    xd->above_context = &t_above[TX_8X8][x_idx];
    xd->left_context = &t_left[TX_8X8][y_idx];
    macro_block_yrd_8x8(x, &r_tmp, &d_tmp, &s_tmp, 0);
    d[TX_8X8] += d_tmp;
    r[TX_8X8][0] += r_tmp;
    s[TX_8X8] = s[TX_8X8] && s_tmp;
#if DEBUG_ERROR
    vp9_inverse_transform_mby_8x8(xd);
    err[1] += vp9_block_error_c(xd->diff, x->src_diff, 256);
#endif
  }
#if DEBUG_ERROR
  printf("IDCT/FDCT error 16x16: %d (d: %d)\n", err[2], d[2]);
  printf("IDCT/FDCT error 8x8: %d (d: %d)\n", err[1], d[1]);
  printf("IDCT/FDCT error 4x4: %d (d: %d)\n", err[0], d[0]);
#endif
  choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache,
                           TX_SIZE_MAX_SB - 1);

  xd->above_context = orig_above;
  xd->left_context = orig_left;
}
Ronald S. Bultje's avatar
Ronald S. Bultje committed
968

969
static void copy_predictor_8x8(uint8_t *dst, const uint8_t *predictor) {
John Koleszar's avatar
John Koleszar committed
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
  const unsigned int *p = (const unsigned int *)predictor;
  unsigned int *d = (unsigned int *)dst;
  d[0] = p[0];
  d[1] = p[1];
  d[4] = p[4];
  d[5] = p[5];
  d[8] = p[8];
  d[9] = p[9];
  d[12] = p[12];
  d[13] = p[13];
  d[16] = p[16];
  d[17] = p[17];
  d[20] = p[20];
  d[21] = p[21];
  d[24] = p[24];
  d[25] = p[25];
  d[28] = p[28];
  d[29] = p[29];
Yaowu Xu's avatar
Yaowu Xu committed
988
989
}

990
static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be,
991
992
993
994
995
                                     BLOCKD *b, B_PREDICTION_MODE *best_mode,
                                     int *bmode_costs,
                                     ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                                     int *bestrate, int *bestratey,
                                     int *bestdistortion) {
John Koleszar's avatar
John Koleszar committed
996
  B_PREDICTION_MODE mode;
Deb Mukherjee's avatar
Deb Mukherjee committed
997
  MACROBLOCKD *xd = &x->e_mbd;
998
  int64_t best_rd = INT64_MAX;
John Koleszar's avatar
John Koleszar committed
999
1000
1001
1002
1003
  int rate = 0;
  int distortion;

  ENTROPY_CONTEXT ta = *a, tempa = *a;
  ENTROPY_CONTEXT tl = *l, templ = *l;
Deb Mukherjee's avatar
Deb Mukherjee committed
1004