vp9_rdopt.c 176 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
11
12
13
14
15
 */


#include <stdio.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
16
17
#include "vp9/common/vp9_pragmas.h"

18
19
20
21
22
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_modecosts.h"
#include "vp9/encoder/vp9_encodeintra.h"
23
24
25
26
27
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_quant_common.h"
28
29
30
31
32
33
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_variance.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_ratectrl.h"
John Koleszar's avatar
John Koleszar committed
34
#include "vpx_mem/vpx_mem.h"
35
36
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
John Koleszar's avatar
John Koleszar committed
37

38
39
40
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_entropy.h"
41
#include "vp9_rtcd.h"
42
#include "vp9/common/vp9_mvref_common.h"
Ronald S. Bultje's avatar
Ronald S. Bultje committed
43
#include "vp9/common/vp9_common.h"
Paul Wilkins's avatar
Paul Wilkins committed
44

John Koleszar's avatar
John Koleszar committed
45
46
#define MAXF(a,b)            (((a) > (b)) ? (a) : (b))

47
48
#define INVALID_MV 0x80008000

49
50
51
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1

John Koleszar's avatar
John Koleszar committed
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
static const int auto_speed_thresh[17] = {
  1000,
  200,
  150,
  130,
  150,
  125,
  120,
  115,
  115,
  115,
  115,
  115,
  115,
  115,
  115,
  115,
  105
John Koleszar's avatar
John Koleszar committed
70
71
};

72
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
73
74
  {ZEROMV,    LAST_FRAME,   NONE},
  {DC_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
75

76
77
  {NEARESTMV, LAST_FRAME,   NONE},
  {NEARMV,    LAST_FRAME,   NONE},
John Koleszar's avatar
John Koleszar committed
78

79
80
  {ZEROMV,    GOLDEN_FRAME, NONE},
  {NEARESTMV, GOLDEN_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
81

82
83
  {ZEROMV,    ALTREF_FRAME, NONE},
  {NEARESTMV, ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
84

85
86
  {NEARMV,    GOLDEN_FRAME, NONE},
  {NEARMV,    ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
87

88
89
90
91
92
93
94
95
  {V_PRED,    INTRA_FRAME,  NONE},
  {H_PRED,    INTRA_FRAME,  NONE},
  {D45_PRED,  INTRA_FRAME,  NONE},
  {D135_PRED, INTRA_FRAME,  NONE},
  {D117_PRED, INTRA_FRAME,  NONE},
  {D153_PRED, INTRA_FRAME,  NONE},
  {D27_PRED,  INTRA_FRAME,  NONE},
  {D63_PRED,  INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
96

97
  {TM_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
98

99
100
101
  {NEWMV,     LAST_FRAME,   NONE},
  {NEWMV,     GOLDEN_FRAME, NONE},
  {NEWMV,     ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
102

103
104
105
  {SPLITMV,   LAST_FRAME,   NONE},
  {SPLITMV,   GOLDEN_FRAME, NONE},
  {SPLITMV,   ALTREF_FRAME, NONE},
106

Yaowu Xu's avatar
Yaowu Xu committed
107
  {I4X4_PRED,    INTRA_FRAME,  NONE},
108
  {I8X8_PRED, INTRA_FRAME,  NONE},
109

John Koleszar's avatar
John Koleszar committed
110
111
112
113
  /* compound prediction modes */
  {ZEROMV,    LAST_FRAME,   GOLDEN_FRAME},
  {NEARESTMV, LAST_FRAME,   GOLDEN_FRAME},
  {NEARMV,    LAST_FRAME,   GOLDEN_FRAME},
114

John Koleszar's avatar
John Koleszar committed
115
116
117
  {ZEROMV,    ALTREF_FRAME, LAST_FRAME},
  {NEARESTMV, ALTREF_FRAME, LAST_FRAME},
  {NEARMV,    ALTREF_FRAME, LAST_FRAME},
118

John Koleszar's avatar
John Koleszar committed
119
120
121
  {ZEROMV,    GOLDEN_FRAME, ALTREF_FRAME},
  {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
  {NEARMV,    GOLDEN_FRAME, ALTREF_FRAME},
122

John Koleszar's avatar
John Koleszar committed
123
124
125
  {NEWMV,     LAST_FRAME,   GOLDEN_FRAME},
  {NEWMV,     ALTREF_FRAME, LAST_FRAME  },
  {NEWMV,     GOLDEN_FRAME, ALTREF_FRAME},
126

John Koleszar's avatar
John Koleszar committed
127
128
  {SPLITMV,   LAST_FRAME,   GOLDEN_FRAME},
  {SPLITMV,   ALTREF_FRAME, LAST_FRAME  },
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
  {SPLITMV,   GOLDEN_FRAME, ALTREF_FRAME},

#if CONFIG_COMP_INTERINTRA_PRED
  /* compound inter-intra prediction */
  {ZEROMV,    LAST_FRAME,   INTRA_FRAME},
  {NEARESTMV, LAST_FRAME,   INTRA_FRAME},
  {NEARMV,    LAST_FRAME,   INTRA_FRAME},
  {NEWMV,     LAST_FRAME,   INTRA_FRAME},

  {ZEROMV,    GOLDEN_FRAME,   INTRA_FRAME},
  {NEARESTMV, GOLDEN_FRAME,   INTRA_FRAME},
  {NEARMV,    GOLDEN_FRAME,   INTRA_FRAME},
  {NEWMV,     GOLDEN_FRAME,   INTRA_FRAME},

  {ZEROMV,    ALTREF_FRAME,   INTRA_FRAME},
  {NEARESTMV, ALTREF_FRAME,   INTRA_FRAME},
  {NEARMV,    ALTREF_FRAME,   INTRA_FRAME},
  {NEWMV,     ALTREF_FRAME,   INTRA_FRAME},
#endif
John Koleszar's avatar
John Koleszar committed
148
149
};

150
151
static void fill_token_costs(vp9_coeff_count *c,
                             vp9_coeff_probs *p,
152
                             TX_SIZE tx_size) {
153
  int i, j, k, l;
John Koleszar's avatar
John Koleszar committed
154

155
  for (i = 0; i < BLOCK_TYPES; i++)
156
157
158
    for (j = 0; j < REF_TYPES; j++)
      for (k = 0; k < COEF_BANDS; k++)
        for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
159
160
161
162
163
164
          vp9_cost_tokens_skip((int *)(c[i][j][k][l]),
                               p[i][j][k][l],
                               vp9_coef_tree);
        }
}

165
166
167
168
static int rd_iifactor[32] =  { 4, 4, 3, 2, 1, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0, };
John Koleszar's avatar
John Koleszar committed
169

170
// 3* dc_qlookup[Q]*dc_qlookup[Q];
171

172
/* values are now correlated to quantizer */
Paul Wilkins's avatar
Paul Wilkins committed
173
174
175
static int sad_per_bit16lut[QINDEX_RANGE];
static int sad_per_bit4lut[QINDEX_RANGE];

176
void vp9_init_me_luts() {
John Koleszar's avatar
John Koleszar committed
177
178
179
180
181
182
183
  int i;

  // Initialize the sad lut tables using a formulaic calculation for now
  // This is to make it easier to resolve the impact of experimental changes
  // to the quantizer tables.
  for (i = 0; i < QINDEX_RANGE; i++) {
    sad_per_bit16lut[i] =
184
185
      (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
    sad_per_bit4lut[i] = (int)((0.063 * vp9_convert_qindex_to_q(i)) + 2.742);
John Koleszar's avatar
John Koleszar committed
186
  }
Paul Wilkins's avatar
Paul Wilkins committed
187
}
John Koleszar's avatar
John Koleszar committed
188

189
static int compute_rd_mult(int qindex) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
190
  int q = vp9_dc_quant(qindex, 0);
191
  return (11 * q * q) >> 2;
192
193
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
194
195
196
void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
  cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
  cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
John Koleszar's avatar
John Koleszar committed
197
198
}

199

Dmitry Kovalev's avatar
Dmitry Kovalev committed
200
void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
201
  int q, i;
John Koleszar's avatar
John Koleszar committed
202

203
  vp9_clear_system_state();  // __asm emms;
John Koleszar's avatar
John Koleszar committed
204

John Koleszar's avatar
John Koleszar committed
205
206
207
208
  // Further tests required to see if optimum is different
  // for key frames, golden frames and arf frames.
  // if (cpi->common.refresh_golden_frame ||
  //     cpi->common.refresh_alt_ref_frame)
Dmitry Kovalev's avatar
Dmitry Kovalev committed
209
  qindex = (qindex < 0) ? 0 : ((qindex > MAXQ) ? MAXQ : qindex);
210

Dmitry Kovalev's avatar
Dmitry Kovalev committed
211
  cpi->RDMULT = compute_rd_mult(qindex);
John Koleszar's avatar
John Koleszar committed
212
213
214
215
216
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
    if (cpi->twopass.next_iiratio > 31)
      cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
    else
      cpi->RDMULT +=
Dmitry Kovalev's avatar
Dmitry Kovalev committed
217
          (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
John Koleszar's avatar
John Koleszar committed
218
  }
219
  cpi->mb.errorperbit = cpi->RDMULT >> 6;
John Koleszar's avatar
John Koleszar committed
220
  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
221

222
  vp9_set_speed_features(cpi);
John Koleszar's avatar
John Koleszar committed
223

Dmitry Kovalev's avatar
Dmitry Kovalev committed
224
225
  q = (int)pow(vp9_dc_quant(qindex, 0) >> 2, 1.25);
  q <<= 2;
John Koleszar's avatar
John Koleszar committed
226
227
  if (q < 8)
    q = 8;
228

John Koleszar's avatar
John Koleszar committed
229
230
231
  if (cpi->RDMULT > 1000) {
    cpi->RDDIV = 1;
    cpi->RDMULT /= 100;
John Koleszar's avatar
John Koleszar committed
232

John Koleszar's avatar
John Koleszar committed
233
234
235
236
237
238
239
    for (i = 0; i < MAX_MODES; i++) {
      if (cpi->sf.thresh_mult[i] < INT_MAX) {
        cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
      } else {
        cpi->rd_threshes[i] = INT_MAX;
      }
      cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
John Koleszar's avatar
John Koleszar committed
240
    }
John Koleszar's avatar
John Koleszar committed
241
242
  } else {
    cpi->RDDIV = 100;
John Koleszar's avatar
John Koleszar committed
243

John Koleszar's avatar
John Koleszar committed
244
245
246
247
248
249
250
    for (i = 0; i < MAX_MODES; i++) {
      if (cpi->sf.thresh_mult[i] < (INT_MAX / q)) {
        cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
      } else {
        cpi->rd_threshes[i] = INT_MAX;
      }
      cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
John Koleszar's avatar
John Koleszar committed
251
    }
John Koleszar's avatar
John Koleszar committed
252
  }
John Koleszar's avatar
John Koleszar committed
253

254
  fill_token_costs(cpi->mb.token_costs[TX_4X4],
255
                   cpi->common.fc.coef_probs_4x4, TX_4X4);
256
  fill_token_costs(cpi->mb.token_costs[TX_8X8],
257
                   cpi->common.fc.coef_probs_8x8, TX_8X8);
258
  fill_token_costs(cpi->mb.token_costs[TX_16X16],
259
                   cpi->common.fc.coef_probs_16x16, TX_16X16);
260
  fill_token_costs(cpi->mb.token_costs[TX_32X32],
261
                   cpi->common.fc.coef_probs_32x32, TX_32X32);
262

263
264
265
266
267
  for (i = 0; i < 2; i++)
    vp9_cost_tokens(cpi->mb.partition_cost[i],
                    cpi->common.fc.partition_prob[i],
                    vp9_partition_tree);

John Koleszar's avatar
John Koleszar committed
268
269
  /*rough estimate for costing*/
  cpi->common.kf_ymode_probs_index = cpi->common.base_qindex >> 4;
270
  vp9_init_mode_costs(cpi);
John Koleszar's avatar
John Koleszar committed
271

272
  if (cpi->common.frame_type != KEY_FRAME) {
273
    vp9_build_nmv_cost_table(
274
275
276
277
278
279
        cpi->mb.nmvjointcost,
        cpi->mb.e_mbd.allow_high_precision_mv ?
        cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
        &cpi->common.fc.nmvc,
        cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
  }
John Koleszar's avatar
John Koleszar committed
280
281
}

282
int vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, int block_size) {
283
  int i, error = 0;
John Koleszar's avatar
John Koleszar committed
284

285
  for (i = 0; i < block_size; i++) {
John Koleszar's avatar
John Koleszar committed
286
287
288
    int this_diff = coeff[i] - dqcoeff[i];
    error += this_diff * this_diff;
  }
John Koleszar's avatar
John Koleszar committed
289

John Koleszar's avatar
John Koleszar committed
290
  return error;
John Koleszar's avatar
John Koleszar committed
291
292
}

293
static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
294
                              int ib, PLANE_TYPE type,
295
296
                              ENTROPY_CONTEXT *a,
                              ENTROPY_CONTEXT *l,
John Koleszar's avatar
John Koleszar committed
297
298
                              TX_SIZE tx_size,
                              int y_blocks) {
299
  MACROBLOCKD *const xd = &mb->e_mbd;
300
301
  MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
  int pt;
302
  int c = 0;
303
304
  int cost = 0, pad;
  const int *scan, *nb;
John Koleszar's avatar
John Koleszar committed
305
306
  const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, ib);
  const int eob = xd->plane[pb_idx.plane].eobs[pb_idx.block];
307
308
  const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff,
                                           pb_idx.block, 16);
309
  const int ref = mbmi->ref_frame != INTRA_FRAME;
310
  unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
311
      mb->token_costs[tx_size][type][ref];
312
  ENTROPY_CONTEXT a_ec, l_ec;
313
314
315
316
  ENTROPY_CONTEXT *const a1 = a +
      sizeof(ENTROPY_CONTEXT_PLANES)/sizeof(ENTROPY_CONTEXT);
  ENTROPY_CONTEXT *const l1 = l +
      sizeof(ENTROPY_CONTEXT_PLANES)/sizeof(ENTROPY_CONTEXT);
317
  TX_TYPE tx_type = DCT_DCT;
318

319
320
321
322
323
324
325
326
#if CONFIG_CODE_ZEROGROUP
  int last_nz_pos[3] = {-1, -1, -1};  // Encoder only
  int is_eoo_list[3] = {0, 0, 0};
  int is_eoo_negative[3] = {0, 0, 0};
  int is_last_zero[3] = {0, 0, 0};
  int o, rc, skip_coef_val;
  vp9_zpc_probs *zpc_probs;
  uint8_t token_cache_full[1024];
327
#endif
328
  const int segment_id = xd->mode_info_context->mbmi.segment_id;
329
330
331
332
  vp9_prob (*coef_probs)[REF_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS]
                        [ENTROPY_NODES];
  int seg_eob, default_eob;
  uint8_t token_cache[1024];
333

334
335
336
337
#if CONFIG_CODE_ZEROGROUP
  vpx_memset(token_cache, UNKNOWN_TOKEN, sizeof(token_cache));
#endif

338
  // Check for consistency of tx_size with mode info
John Koleszar's avatar
John Koleszar committed
339
  assert((!type && !pb_idx.plane) || (type && pb_idx.plane));
340
341
342
343
344
345
346
  if (type == PLANE_TYPE_Y_WITH_DC) {
    assert(xd->mode_info_context->mbmi.txfm_size == tx_size);
  } else {
    TX_SIZE tx_size_uv = get_uv_tx_size(xd);
    assert(tx_size == tx_size_uv);
  }

347
  switch (tx_size) {
348
    case TX_4X4: {
349
350
      tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
          get_tx_type_4x4(xd, ib) : DCT_DCT;
351
352
      a_ec = *a;
      l_ec = *l;
353
      coef_probs = cm->fc.coef_probs_4x4;
354
      seg_eob = 16;
355
356
357
358
359
360
      if (tx_type == ADST_DCT) {
        scan = vp9_row_scan_4x4;
      } else if (tx_type == DCT_ADST) {
        scan = vp9_col_scan_4x4;
      } else {
        scan = vp9_default_zig_zag1d_4x4;
Daniel Kang's avatar
Daniel Kang committed
361
      }
362
363
364
#if CONFIG_CODE_ZEROGROUP
      zpc_probs = &cm->fc.zpc_probs_4x4;
#endif
Daniel Kang's avatar
Daniel Kang committed
365
      break;
366
    }
367
368
    case TX_8X8: {
      const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
369
370
      const int sz = 3 + mb_width_log2(sb_type);
      const int x = ib & ((1 << sz) - 1), y = ib - x;
371
372
      TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
          get_tx_type_8x8(xd, y + (x >> 1)) : DCT_DCT;
373
374
      a_ec = (a[0] + a[1]) != 0;
      l_ec = (l[0] + l[1]) != 0;
375
376
377
378
379
380
381
      if (tx_type == ADST_DCT) {
        scan = vp9_row_scan_8x8;
      } else if (tx_type == DCT_ADST) {
        scan = vp9_col_scan_8x8;
      } else {
        scan = vp9_default_zig_zag1d_8x8;
      }
382
      coef_probs = cm->fc.coef_probs_8x8;
383
      seg_eob = 64;
384
385
386
#if CONFIG_CODE_ZEROGROUP
      zpc_probs = &cm->fc.zpc_probs_8x8;
#endif
Daniel Kang's avatar
Daniel Kang committed
387
      break;
388
389
390
    }
    case TX_16X16: {
      const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
391
392
      const int sz = 4 + mb_width_log2(sb_type);
      const int x = ib & ((1 << sz) - 1), y = ib - x;
393
394
      TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
          get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT;
395
396
397
398
399
400
401
      if (tx_type == ADST_DCT) {
        scan = vp9_row_scan_16x16;
      } else if (tx_type == DCT_ADST) {
        scan = vp9_col_scan_16x16;
      } else {
        scan = vp9_default_zig_zag1d_16x16;
      }
402
      coef_probs = cm->fc.coef_probs_16x16;
403
      seg_eob = 256;
404
      if (type == PLANE_TYPE_UV) {
405
406
407
408
409
        a_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0;
        l_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0;
      } else {
        a_ec = (a[0] + a[1] + a[2] + a[3]) != 0;
        l_ec = (l[0] + l[1] + l[2] + l[3]) != 0;
Deb Mukherjee's avatar
Deb Mukherjee committed
410
      }
411
412
413
#if CONFIG_CODE_ZEROGROUP
      zpc_probs = &cm->fc.zpc_probs_16x16;
#endif
Daniel Kang's avatar
Daniel Kang committed
414
      break;
415
    }
416
417
    case TX_32X32:
      scan = vp9_default_zig_zag1d_32x32;
418
      coef_probs = cm->fc.coef_probs_32x32;
419
      seg_eob = 1024;
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
      if (type == PLANE_TYPE_UV) {
        ENTROPY_CONTEXT *a2, *a3, *l2, *l3;
        a2 = a1 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
        a3 = a2 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
        l2 = l1 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
        l3 = l2 + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT);
        a_ec = (a[0] + a[1] + a1[0] + a1[1] +
                a2[0] + a2[1] + a3[0] + a3[1]) != 0;
        l_ec = (l[0] + l[1] + l1[0] + l1[1] +
                l2[0] + l2[1] + l3[0] + l3[1]) != 0;
      } else {
        a_ec = (a[0] + a[1] + a[2] + a[3] +
                a1[0] + a1[1] + a1[2] + a1[3]) != 0;
        l_ec = (l[0] + l[1] + l[2] + l[3] +
                l1[0] + l1[1] + l1[2] + l1[3]) != 0;
      }
436
437
438
#if CONFIG_CODE_ZEROGROUP
      zpc_probs = &cm->fc.zpc_probs_32x32;
#endif
439
      break;
Daniel Kang's avatar
Daniel Kang committed
440
    default:
441
      abort();
Daniel Kang's avatar
Daniel Kang committed
442
443
      break;
  }
John Koleszar's avatar
John Koleszar committed
444
  assert(eob <= seg_eob);
445

446
  pt = combine_entropy_contexts(a_ec, l_ec);
447
448
  nb = vp9_get_coef_neighbors_handle(scan, &pad);
  default_eob = seg_eob;
449

450
451
  if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))
    seg_eob = 0;
452

453
454
455
456
  /* sanity check to ensure that we do not have spurious non-zero q values */
  if (eob < seg_eob)
    assert(qcoeff_ptr[scan[eob]] == 0);

457
458
459
460
461
462
463
464
465
466
#if CONFIG_CODE_ZEROGROUP
  vpx_memset(token_cache_full, ZERO_TOKEN, sizeof(token_cache_full));
  for (c = 0; c < eob; ++c) {
    rc = scan[c];
    token_cache_full[rc] = vp9_dct_value_tokens_ptr[qcoeff_ptr[rc]].token;
    o = vp9_get_orientation(rc, tx_size);
    if (qcoeff_ptr[rc] != 0)
      last_nz_pos[o] = c;
  }
#endif
467
  {
468
    for (c = 0; c < eob; c++) {
469
      int v = qcoeff_ptr[scan[c]];
470
      int t = vp9_dct_value_tokens_ptr[v].token;
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
      int band = get_coef_band(scan, tx_size, c);
      if (c)
        pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
#if CONFIG_CODE_ZEROGROUP
      rc = scan[c];
      o = vp9_get_orientation(rc, tx_size);
      skip_coef_val = (token_cache[rc] == ZERO_TOKEN || is_eoo_list[o]);
      if (!skip_coef_val) {
        cost += token_costs[band][pt][t] + vp9_dct_value_cost_ptr[v];
      } else {
        assert(v == 0);
      }
#else
      cost += token_costs[band][pt][t] + vp9_dct_value_cost_ptr[v];
#endif
486
487
      if (!c || token_cache[scan[c - 1]])
        cost += vp9_cost_bit(coef_probs[type][ref][band][pt][0], 1);
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
      token_cache[scan[c]] = t;
#if CONFIG_CODE_ZEROGROUP
      if (t == ZERO_TOKEN && !skip_coef_val) {
        int eoo = 0, use_eoo;
#if USE_ZPC_EOORIENT == 1
        use_eoo = vp9_use_eoo(c, seg_eob, scan, tx_size,
                              is_last_zero, is_eoo_list);
#else
        use_eoo = 0;
#endif
        if (use_eoo) {
          eoo = vp9_is_eoo(c, eob, scan, tx_size, qcoeff_ptr, last_nz_pos);
          if (eoo && is_eoo_negative[o]) eoo = 0;
          if (eoo) {
            int c_;
            int savings = 0;
            int zsaved = 0;
            savings = vp9_cost_bit((*zpc_probs)[ref]
                                   [coef_to_zpc_band(band)]
                                   [coef_to_zpc_ptok(pt)][0], 1) -
                      vp9_cost_bit((*zpc_probs)[ref]
                                   [coef_to_zpc_band(band)]
                                   [coef_to_zpc_ptok(pt)][0], 0);
            for (c_ = c + 1; c_ < eob; ++c_) {
              if (o == vp9_get_orientation(scan[c_], tx_size)) {
                int pt_ = vp9_get_coef_context(scan, nb, pad,
                                               token_cache_full, c_,
                                               default_eob);
                int band_ = get_coef_band(scan, tx_size, c_);
                assert(token_cache_full[scan[c_]] == ZERO_TOKEN);
                if (!c_ || token_cache_full[scan[c_ - 1]])
                  savings += vp9_cost_bit(
                      coef_probs[type][ref][band_][pt_][0], 1);
                savings += vp9_cost_bit(
                    coef_probs[type][ref][band_][pt_][1], 0);
                zsaved++;
              }
            }
            if (savings < 0) {
            // if (zsaved < ZPC_ZEROSSAVED_EOO) {
              eoo = 0;
              is_eoo_negative[o] = 1;
            }
          }
        }
        if (use_eoo) {
          cost += vp9_cost_bit((*zpc_probs)[ref]
                                           [coef_to_zpc_band(band)]
                                           [coef_to_zpc_ptok(pt)][0], !eoo);
          if (eoo) {
            assert(is_eoo_list[o] == 0);
            is_eoo_list[o] = 1;
          }
        }
      }
      is_last_zero[o] = (t == ZERO_TOKEN);
544
#endif
545
    }
546
547
548
549
550
551
552
    if (c < seg_eob) {
      if (c)
        pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
      cost += mb->token_costs[tx_size][type][ref]
          [get_coef_band(scan, tx_size, c)]
          [pt][DCT_EOB_TOKEN];
    }
553
554
  }

555
556
    // is eob first coefficient;
    pt = (c > 0);
557
  *a = *l = pt;
558
559
560
561
562
563
564
565
566
567
568
569
570
571
  if (tx_size >= TX_8X8) {
    a[1] = l[1] = pt;
    if (tx_size >= TX_16X16) {
      if (type == PLANE_TYPE_UV) {
        a1[0] = a1[1] = l1[0] = l1[1] = pt;
      } else {
        a[2] = a[3] = l[2] = l[3] = pt;
        if (tx_size >= TX_32X32) {
          a1[0] = a1[1] = a1[2] = a1[3] = pt;
          l1[0] = l1[1] = l1[2] = l1[3] = pt;
        }
      }
    }
  }
572
573
574
  return cost;
}

575
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
576
577
578
579
580
                                     int (*r)[2], int *rate,
                                     int *d, int *distortion,
                                     int *s, int *skip,
                                     int64_t txfm_cache[NB_TXFM_MODES],
                                     TX_SIZE max_txfm_size) {
581
582
583
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
584
  vp9_prob skip_prob = vp9_get_pred_prob(cm, xd, PRED_MBSKIP);
585
586
  int64_t rd[TX_SIZE_MAX_SB][2];
  int n, m;
587
  int s0, s1;
588
589
590
591
592
593
594
595
596
597

  for (n = TX_4X4; n <= max_txfm_size; n++) {
    r[n][1] = r[n][0];
    for (m = 0; m <= n - (n == max_txfm_size); m++) {
      if (m == n)
        r[n][1] += vp9_cost_zero(cm->prob_tx[m]);
      else
        r[n][1] += vp9_cost_one(cm->prob_tx[m]);
    }
  }
598

599
600
601
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
602

603
604
605
606
607
608
  for (n = TX_4X4; n <= max_txfm_size; n++) {
    if (s[n]) {
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
609
610
611
    }
  }

612
613
614
615
616
617
  if (max_txfm_size == TX_32X32 &&
      (cm->txfm_mode == ALLOW_32X32 ||
       (cm->txfm_mode == TX_MODE_SELECT &&
        rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
        rd[TX_32X32][1] < rd[TX_4X4][1]))) {
    mbmi->txfm_size = TX_32X32;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
618
619
620
621
622
  } else if ( cm->txfm_mode == ALLOW_16X16 ||
             (max_txfm_size == TX_16X16 && cm->txfm_mode == ALLOW_32X32) ||
             (cm->txfm_mode == TX_MODE_SELECT &&
              rd[TX_16X16][1] < rd[TX_8X8][1] &&
              rd[TX_16X16][1] < rd[TX_4X4][1])) {
623
    mbmi->txfm_size = TX_16X16;
624
  } else if (cm->txfm_mode == ALLOW_8X8 ||
625
           (cm->txfm_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
626
627
    mbmi->txfm_size = TX_8X8;
  } else {
628
    assert(cm->txfm_mode == ONLY_4X4 || cm->txfm_mode == TX_MODE_SELECT);
629
630
631
    mbmi->txfm_size = TX_4X4;
  }

632
  *distortion = d[mbmi->txfm_size];
633
  *rate       = r[mbmi->txfm_size][cm->txfm_mode == TX_MODE_SELECT];
634
635
  *skip       = s[mbmi->txfm_size];

636
637
638
639
640
641
642
643
  txfm_cache[ONLY_4X4] = rd[TX_4X4][0];
  txfm_cache[ALLOW_8X8] = rd[TX_8X8][0];
  txfm_cache[ALLOW_16X16] = rd[TX_16X16][0];
  txfm_cache[ALLOW_32X32] = rd[max_txfm_size][0];
  if (max_txfm_size == TX_32X32 &&
      rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
      rd[TX_32X32][1] < rd[TX_4X4][1])
    txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
Ronald S. Bultje's avatar
Ronald S. Bultje committed
644
  else if (rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
645
    txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
646
  else
647
648
    txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
                                 rd[TX_4X4][1] : rd[TX_8X8][1];
649
650
}

651
static int vp9_sb_block_error_c(int16_t *coeff, int16_t *dqcoeff,
652
                                int block_size, int shift) {
653
654
655
656
657
658
659
  int i;
  int64_t error = 0;

  for (i = 0; i < block_size; i++) {
    unsigned int this_diff = coeff[i] - dqcoeff[i];
    error += this_diff * this_diff;
  }
660
  error >>= shift;
661

Frank Galligan's avatar
Frank Galligan committed
662
  return error > INT_MAX ? INT_MAX : (int)error;
663
664
}

665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
static int vp9_sb_uv_block_error_c(int16_t *coeff,
                                   int16_t *dqcoeff0, int16_t *dqcoeff1,
                                   int block_size, int shift) {
  int i;
  int64_t error = 0;

  for (i = 0; i < block_size / 2; i++) {
    unsigned int this_diff = coeff[i] - dqcoeff0[i];
    error += this_diff * this_diff;
  }
  coeff += block_size / 2;
  for (i = 0; i < block_size / 2; i++) {
    unsigned int this_diff = coeff[i] - dqcoeff1[i];
    error += this_diff * this_diff;
  }
  error >>= shift;

  return error > INT_MAX ? INT_MAX : (int)error;
}

685
686
687
688
static int rdcost_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x,
                          BLOCK_SIZE_TYPE bsize) {
  const int bwl = mb_width_log2(bsize) + 2, bw = 1 << bwl;
  const int bh = 1 << (mb_height_log2(bsize) + 2);
689
690
  int cost = 0, b;
  MACROBLOCKD *const xd = &x->e_mbd;
691
  ENTROPY_CONTEXT_PLANES t_above[4], t_left[4];
692

693
694
695
696
  vpx_memcpy(&t_above, xd->above_context,
             (sizeof(ENTROPY_CONTEXT_PLANES) * bw) >> 2);
  vpx_memcpy(&t_left,  xd->left_context,
             (sizeof(ENTROPY_CONTEXT_PLANES) * bh) >> 2);
697

698
699
  for (b = 0; b < bw * bh; b++) {
    const int x_idx = b & (bw - 1), y_idx = b >> bwl;
700
    cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC,
701
702
703
704
                ((ENTROPY_CONTEXT *) &t_above[x_idx >> 2]) + (x_idx & 3),
                ((ENTROPY_CONTEXT *) &t_left[y_idx >> 2]) + (y_idx & 3),
                TX_4X4, bw * bh);
  }
705
706
707
708
709

  return cost;
}

static void super_block_yrd_4x4(VP9_COMMON *const cm, MACROBLOCK *x,
710
711
712
                                int *rate, int *distortion, int *skippable,
                                BLOCK_SIZE_TYPE bsize) {
  const int bwl = mb_width_log2(bsize) + 2, bhl = mb_height_log2(bsize) + 2;
713
714
715
  MACROBLOCKD *const xd = &x->e_mbd;

  xd->mode_info_context->mbmi.txfm_size = TX_4X4;
716
717
  vp9_transform_sby_4x4(x, bsize);
  vp9_quantize_sby_4x4(x, bsize);
718

719
720
721
  *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff,
                                     16 << (bwl + bhl), 2);
  *rate       = rdcost_sby_4x4(cm, x, bsize);
722
  *skippable  = vp9_sby_is_skippable(xd, bsize);
723
724
}

725
726
727
728
static int rdcost_sby_8x8(VP9_COMMON *const cm, MACROBLOCK *x,
                          BLOCK_SIZE_TYPE bsize) {
  const int bwl = mb_width_log2(bsize) + 1, bw = 1 << bwl;
  const int bh = 1 << (mb_height_log2(bsize) + 1);
729
730
  int cost = 0, b;
  MACROBLOCKD *const xd = &x->e_mbd;
731
  ENTROPY_CONTEXT_PLANES t_above[4], t_left[4];
732

733
734
735
736
737
738
739
740
741
742
743
744
  vpx_memcpy(&t_above, xd->above_context,
             (sizeof(ENTROPY_CONTEXT_PLANES) * bw) >> 1);
  vpx_memcpy(&t_left,  xd->left_context,
             (sizeof(ENTROPY_CONTEXT_PLANES) * bh) >> 1);

  for (b = 0; b < bw * bh; b++) {
    const int x_idx = b & (bw - 1), y_idx = b >> bwl;
    cost += cost_coeffs(cm, x, b * 4, PLANE_TYPE_Y_WITH_DC,
                ((ENTROPY_CONTEXT *) &t_above[x_idx >> 1]) + ((x_idx & 1) << 1),
                ((ENTROPY_CONTEXT *) &t_left[y_idx >> 1]) + ((y_idx & 1) << 1),
                TX_8X8, 4 * bw * bh);
  }
745
746
747
748
749

  return cost;
}

static void super_block_yrd_8x8(VP9_COMMON *const cm, MACROBLOCK *x,
750
751
752
                                int *rate, int *distortion, int *skippable,
                                BLOCK_SIZE_TYPE bsize) {
  const int bwl = mb_width_log2(bsize) + 1, bhl = mb_height_log2(bsize) + 1;
753
754
755
  MACROBLOCKD *const xd = &x->e_mbd;

  xd->mode_info_context->mbmi.txfm_size = TX_8X8;
756
757
  vp9_transform_sby_8x8(x, bsize);
  vp9_quantize_sby_8x8(x, bsize);
758

759
760
761
  *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff,
                                     64 << (bhl + bwl), 2);
  *rate       = rdcost_sby_8x8(cm, x, bsize);
762
  *skippable  = vp9_sby_is_skippable(xd, bsize);
763
764
}

765
766
767
768
static int rdcost_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x,
                            BLOCK_SIZE_TYPE bsize) {
  const int bwl = mb_width_log2(bsize), bw = 1 << bwl;
  const int bh = 1 << mb_height_log2(bsize);
769
770
  int cost = 0, b;
  MACROBLOCKD *const xd = &x->e_mbd;
771
  ENTROPY_CONTEXT_PLANES t_above[4], t_left[4];
772

773
774
  vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES) * bw);
  vpx_memcpy(&t_left,  xd->left_context,  sizeof(ENTROPY_CONTEXT_PLANES) * bh);
775

776
777
778
779
780
781
782
  for (b = 0; b < bw * bh; b++) {
    const int x_idx = b & (bw - 1), y_idx = b >> bwl;
    cost += cost_coeffs(cm, x, b * 16, PLANE_TYPE_Y_WITH_DC,
                        (ENTROPY_CONTEXT *) &t_above[x_idx],
                        (ENTROPY_CONTEXT *) &t_left[y_idx],
                        TX_16X16, bw * bh * 16);
  }
783
784
785
786
787

  return cost;
}

static void super_block_yrd_16x16(VP9_COMMON *const cm, MACROBLOCK *x,
788
789
790
                                  int *rate, int *distortion, int *skippable,
                                  BLOCK_SIZE_TYPE bsize) {
  const int bwl = mb_width_log2(bsize), bhl = mb_height_log2(bsize);
791
792
793
  MACROBLOCKD *const xd = &x->e_mbd;

  xd->mode_info_context->mbmi.txfm_size = TX_16X16;
794
795
  vp9_transform_sby_16x16(x, bsize);
  vp9_quantize_sby_16x16(x, bsize);
796

797
798
799
  *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff,
                                     256 << (bwl + bhl), 2);
  *rate       = rdcost_sby_16x16(cm, x, bsize);
800
  *skippable  = vp9_sby_is_skippable(xd, bsize);
801
802
}

803
804
805
806
807
static int rdcost_sby_32x32(VP9_COMMON *const cm, MACROBLOCK *x,
                            BLOCK_SIZE_TYPE bsize) {
  const int bwl = mb_width_log2(bsize) - 1, bw = 1 << bwl;
  const int bh = 1 << (mb_height_log2(bsize) - 1);
  int cost = 0, b;
808
  MACROBLOCKD * const xd = &x->e_mbd;
809
  ENTROPY_CONTEXT_PLANES t_above[4], t_left[4];
810

811
812
813
814
815
816
817
818
819
820
821
822
  vpx_memcpy(&t_above, xd->above_context,
             sizeof(ENTROPY_CONTEXT_PLANES) * bw * 2);
  vpx_memcpy(&t_left,  xd->left_context,
             sizeof(ENTROPY_CONTEXT_PLANES) * bh * 2);

  for (b = 0; b < bw * bh; b++) {
    const int x_idx = b & (bw - 1), y_idx = b >> bwl;
    cost += cost_coeffs(cm, x, b * 64, PLANE_TYPE_Y_WITH_DC,
                        (ENTROPY_CONTEXT *) &t_above[x_idx * 2],
                        (ENTROPY_CONTEXT *) &t_left[y_idx * 2],
                        TX_32X32, bw * bh * 64);
  }
823

824
  return cost;
825
826
}

827
static void super_block_yrd_32x32(VP9_COMMON *const cm, MACROBLOCK *x,
828
829
830
                                  int *rate, int *distortion, int *skippable,
                                  BLOCK_SIZE_TYPE bsize) {
  const int bwl = mb_width_log2(bsize) - 1, bhl = mb_height_log2(bsize) - 1;
831
  MACROBLOCKD *const xd = &x->e_mbd;
832

833
  xd->mode_info_context->mbmi.txfm_size = TX_32X32;
834
835
  vp9_transform_sby_32x32(x, bsize);
  vp9_quantize_sby_32x32(x, bsize);
836

837
838
839
  *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff,
                                     1024 << (bwl + bhl), 0);
  *rate       = rdcost_sby_32x32(cm, x, bsize);
840
  *skippable  = vp9_sby_is_skippable(xd, bsize);
841
842
}

843
844
static void super_block_yrd(VP9_COMP *cpi,
                            MACROBLOCK *x, int *rate, int *distortion,
845
                            int *skip, BLOCK_SIZE_TYPE bs,
846
                            int64_t txfm_cache[NB_TXFM_MODES]) {
847
  VP9_COMMON *const cm = &cpi->common;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
848
  MACROBLOCKD *const xd = &x->e_mbd;
849
  int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB];
850
851
  uint8_t *src = x->src.y_buffer, *dst = xd->plane[0].dst.buf;
  int src_y_stride = x->src.y_stride, dst_y_stride = xd->plane[0].dst.stride;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
852

853
  vp9_subtract_sby_s_c(x->src_diff, src, src_y_stride, dst, dst_y_stride, bs);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
854

855
856
857
858
859
860
  if (bs >= BLOCK_SIZE_SB32X32)
    super_block_yrd_32x32(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32],
                          bs);
  super_block_yrd_16x16(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16], bs);
  super_block_yrd_8x8(cm, x,   &r[TX_8X8][0],   &d[TX_8X8],   &s[TX_8X8],   bs);
  super_block_yrd_4x4(cm, x,   &r[TX_4X4][0],   &d[TX_4X4],   &s[TX_4X4],   bs);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
861
862

  choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache,
863
                           TX_32X32 - (bs < BLOCK_SIZE_SB32X32));
Ronald S. Bultje's avatar
Ronald S. Bultje committed
864
}
Ronald S. Bultje's avatar
Ronald S. Bultje committed
865

866
867
static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
                                     B_PREDICTION_MODE *best_mode,
868
869
870
871
                                     int *bmode_costs,
                                     ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                                     int *bestrate, int *bestratey,
                                     int *bestdistortion) {
John Koleszar's avatar
John Koleszar committed
872
  B_PREDICTION_MODE mode;
Deb Mukherjee's avatar
Deb Mukherjee committed
873
  MACROBLOCKD *xd = &x->e_mbd;
874
  int64_t best_rd = INT64_MAX;
John Koleszar's avatar
John Koleszar committed
875
876
  int rate = 0;
  int distortion;
877
  VP9_COMMON *const cm = &cpi->common;
878
879
  BLOCK *be = x->block + ib;
  BLOCKD *b = xd->block + ib;
John Koleszar's avatar
John Koleszar committed
880
881
882

  ENTROPY_CONTEXT ta = *a, tempa = *a;
  ENTROPY_CONTEXT tl = *l, templ = *l;
Deb Mukherjee's avatar
Deb Mukherjee committed
883
884
  TX_TYPE tx_type = DCT_DCT;
  TX_TYPE best_tx_type = DCT_DCT;
John Koleszar's avatar
John Koleszar committed
885
886
887
888
889
  /*
   * The predictor buffer is a 2d buffer with a stride of 16.  Create
   * a temp buffer that meets the stride requirements, but we are only
   * interested in the left 4x4 block
   * */
890
  DECLARE_ALIGNED_ARRAY(16, int16_t, best_dqcoeff, 16);
John Koleszar's avatar
John Koleszar committed
891

892
  assert(ib < 16);
893
#if CONFIG_NEWBINTRAMODES
894
  b->bmi.as_mode.context = vp9_find_bpred_context(xd, b);
895
#endif
896
  xd->mode_info_context->mbmi.txfm_size = TX_4X4;
897
  for (mode = B_DC_PRED; mode < LEFT4X4; mode++) {
898
899
    int64_t this_rd;
    int ratey;
900
901

#if CONFIG_NEWBINTRAMODES
902
903
904
905
906
907
908
    if (xd->frame_type == KEY_FRAME) {
      if (mode == B_CONTEXT_PRED) continue;
    } else {
      if (mode >= B_CONTEXT_PRED - CONTEXT_PRED_REPLACEMENTS &&
          mode < B_CONTEXT_PRED)
        continue;
    }
909
#endif
910

911
    b->bmi.as_mode.first = mode;
912
#if CONFIG_NEWBINTRAMODES
913
914
    rate = bmode_costs[
        mode == B_CONTEXT_PRED ? mode - CONTEXT_PRED_REPLACEMENTS : mode];
915
#else
916
    rate = bmode_costs[mode];
917
#endif
John Koleszar's avatar
John Koleszar committed
918

919
    vp9_intra4x4_predict(xd, b, mode, *(b->base_dst) + b->dst, b->dst_stride);
920
    vp9_subtract_b(be, b, 16);
John Koleszar's avatar
John Koleszar committed
921

922
    b->bmi.as_mode.first = mode;
923
    tx_type = get_tx_type_4x4(xd, be - x->block);
924
    if (tx_type != DCT_DCT) {
925
      vp9_short_fht4x4(be->src_diff, be->coeff, 16, tx_type);
926
      vp9_ht_quantize_b_4x4(x, be - x->block, tx_type);
927
    } else {
Yaowu Xu's avatar
Yaowu Xu committed
928
      x->fwd_txm4x4(be->src_diff, be->coeff, 32);
John Koleszar's avatar
John Koleszar committed
929
      x->quantize_b_4x4(x, be - x->block, 16);
930
    }
John Koleszar's avatar
John Koleszar committed
931

932
933
    tempa = ta;
    templ = tl;
Jingning Han's avatar
Jingning Han committed
934

935
    ratey = cost_coeffs(cm, x, b - xd->block,
John Koleszar's avatar
John Koleszar committed
936
                        PLANE_TYPE_Y_WITH_DC, &tempa, &templ, TX_4X4, 16);
937
    rate += ratey;
938
939
940
    distortion = vp9_block_error(be->coeff,
                                 BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16),
                                 16) >> 2;
Jingning Han's avatar
Jingning Han committed
941

942
    this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
Jingning Han's avatar
Jingning Han committed
943

944
945
946
947
948
949
950
951
952
    if (this_rd < best_rd) {
      *bestrate = rate;
      *bestratey = ratey;
      *bestdistortion = distortion;
      best_rd = this_rd;
      *best_mode = mode;
      best_tx_type = tx_type;
      *a = tempa;
      *l = templ;
953
      vpx_memcpy(best_dqcoeff, BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16), 32);
John Koleszar's avatar
John Koleszar committed
954
    }
John Koleszar's avatar
John Koleszar committed
955
956
  }
  b->bmi.as_mode.first = (B_PREDICTION_MODE)(*best_mode);
957

Jingning Han's avatar
Jingning Han committed
958
  // inverse transform
Deb Mukherjee's avatar
Deb Mukherjee committed
959
  if (best_tx_type != DCT_DCT)
960
    vp9_short_iht4x4(best_dqcoeff, b->diff, 16, best_tx_type);
961
  else
Yaowu Xu's avatar
Yaowu Xu committed
962
    xd->inv_txm4x4(best_dqcoeff, b->diff, 32);
Jingning Han's avatar
Jingning Han committed
963

964
965
966
967
  vp9_intra4x4_predict(xd, b, *best_mode,
                       *(b->base_dst) + b->dst, b->dst_stride);
  vp9_recon_b(*(b->base_dst) + b->dst, b->diff,
              *(b->base_dst) + b->dst, b->dst_stride);
John Koleszar's avatar
John Koleszar committed
968

John Koleszar's avatar
John Koleszar committed
969
  return best_rd;
John Koleszar's avatar
John Koleszar committed
970
971
}

972
973
static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
                                         int *Rate, int *rate_y,
974
                                         int *Distortion, int64_t best_rd) {
John Koleszar's avatar
John Koleszar committed
975
976
  int i;
  MACROBLOCKD *const xd = &mb->e_mbd;
Yaowu Xu's avatar
Yaowu Xu committed
977
  int cost = mb->mbmode_cost[xd->frame_type][I4X4_PRED];
John Koleszar's avatar
John Koleszar committed
978
979
980
981
  int distortion = 0;
  int tot_rate_y = 0;
  int64_t total_rd = 0;
  ENTROPY_CONTEXT_PLANES t_above, t_left;
982
  ENTROPY_CONTEXT *ta, *tl;
John Koleszar's avatar
John Koleszar committed
983
984
  int *bmode_costs;

985
986
987
988
  vpx_memcpy(&t_above, xd->above_context,
             sizeof(ENTROPY_CONTEXT_PLANES));
  vpx_memcpy(&t_left, xd->left_context,
             sizeof(ENTROPY_CONTEXT_PLANES));