vp9_rdopt.c 117 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
11
12
13
14
15
 */


#include <stdio.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
16

17
#include "vp9/common/vp9_pragmas.h"
18
19
20
21
22
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_modecosts.h"
#include "vp9/encoder/vp9_encodeintra.h"
23
24
25
26
27
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_quant_common.h"
28
29
30
31
32
33
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_variance.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_ratectrl.h"
John Koleszar's avatar
John Koleszar committed
34
#include "vpx_mem/vpx_mem.h"
35
36
37
38
39
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_entropy.h"
40
#include "vp9_rtcd.h"
41
#include "vp9/common/vp9_mvref_common.h"
Ronald S. Bultje's avatar
Ronald S. Bultje committed
42
#include "vp9/common/vp9_common.h"
Paul Wilkins's avatar
Paul Wilkins committed
43

44
45
#define INVALID_MV 0x80008000

46
47
48
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1

49
50
51
DECLARE_ALIGNED(16, extern const uint8_t,
                vp9_pt_energy_class[MAX_ENTROPY_TOKENS]);

Ronald S. Bultje's avatar
Ronald S. Bultje committed
52
#define I4X4_PRED 0x8000
Ronald S. Bultje's avatar
Ronald S. Bultje committed
53
#define SPLITMV 0x10000
Ronald S. Bultje's avatar
Ronald S. Bultje committed
54

55
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
56
57
  {ZEROMV,    LAST_FRAME,   NONE},
  {DC_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
58

59
60
  {NEARESTMV, LAST_FRAME,   NONE},
  {NEARMV,    LAST_FRAME,   NONE},
John Koleszar's avatar
John Koleszar committed
61

62
63
  {ZEROMV,    GOLDEN_FRAME, NONE},
  {NEARESTMV, GOLDEN_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
64

65
66
  {ZEROMV,    ALTREF_FRAME, NONE},
  {NEARESTMV, ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
67

68
69
  {NEARMV,    GOLDEN_FRAME, NONE},
  {NEARMV,    ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
70

71
72
73
74
75
76
77
78
  {V_PRED,    INTRA_FRAME,  NONE},
  {H_PRED,    INTRA_FRAME,  NONE},
  {D45_PRED,  INTRA_FRAME,  NONE},
  {D135_PRED, INTRA_FRAME,  NONE},
  {D117_PRED, INTRA_FRAME,  NONE},
  {D153_PRED, INTRA_FRAME,  NONE},
  {D27_PRED,  INTRA_FRAME,  NONE},
  {D63_PRED,  INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
79

80
  {TM_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
81

82
83
84
  {NEWMV,     LAST_FRAME,   NONE},
  {NEWMV,     GOLDEN_FRAME, NONE},
  {NEWMV,     ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
85

86
87
88
  {SPLITMV,   LAST_FRAME,   NONE},
  {SPLITMV,   GOLDEN_FRAME, NONE},
  {SPLITMV,   ALTREF_FRAME, NONE},
89

Ronald S. Bultje's avatar
Ronald S. Bultje committed
90
  {I4X4_PRED, INTRA_FRAME,  NONE},
91

John Koleszar's avatar
John Koleszar committed
92
  /* compound prediction modes */
Ronald S. Bultje's avatar
Ronald S. Bultje committed
93
94
95
  {ZEROMV,    LAST_FRAME,   ALTREF_FRAME},
  {NEARESTMV, LAST_FRAME,   ALTREF_FRAME},
  {NEARMV,    LAST_FRAME,   ALTREF_FRAME},
96

John Koleszar's avatar
John Koleszar committed
97
98
99
  {ZEROMV,    GOLDEN_FRAME, ALTREF_FRAME},
  {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
  {NEARMV,    GOLDEN_FRAME, ALTREF_FRAME},
100

Ronald S. Bultje's avatar
Ronald S. Bultje committed
101
  {NEWMV,     LAST_FRAME,   ALTREF_FRAME},
John Koleszar's avatar
John Koleszar committed
102
  {NEWMV,     GOLDEN_FRAME, ALTREF_FRAME},
103

Ronald S. Bultje's avatar
Ronald S. Bultje committed
104
  {SPLITMV,   LAST_FRAME,   ALTREF_FRAME},
105
  {SPLITMV,   GOLDEN_FRAME, ALTREF_FRAME},
John Koleszar's avatar
John Koleszar committed
106
107
};

108
109
110
111
112
113
114
115
116
117
118
// The baseline rd thresholds for breaking out of the rd loop for
// certain modes are assumed to be based on 8x8 blocks.
// This table is used to correct for blocks size.
// The factors here are << 2 (2 = x0.5, 32 = x8 etc).
static int rd_thresh_block_size_factor[BLOCK_SIZE_TYPES] =
  {2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32};

#define BASE_RD_THRESH_FREQ_FACT 16
#define MAX_RD_THRESH_FREQ_FACT 32
#define MAX_RD_THRESH_FREQ_INC 1

119
120
121
static void fill_token_costs(vp9_coeff_count (*c)[BLOCK_TYPES],
                             vp9_coeff_count (*cnoskip)[BLOCK_TYPES],
                             vp9_coeff_probs_model (*p)[BLOCK_TYPES]) {
122
  int i, j, k, l;
123
124
125
126
127
128
129
130
131
132
  TX_SIZE t;
  for (t = TX_4X4; t <= TX_32X32; t++)
    for (i = 0; i < BLOCK_TYPES; i++)
      for (j = 0; j < REF_TYPES; j++)
        for (k = 0; k < COEF_BANDS; k++)
          for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
            vp9_prob probs[ENTROPY_NODES];
            vp9_model_to_full_probs(p[t][i][j][k][l], probs);
            vp9_cost_tokens((int *)cnoskip[t][i][j][k][l], probs,
                            vp9_coef_tree);
133
#if CONFIG_BALANCED_COEFTREE
134
135
136
137
            // Replace the eob node prob with a very small value so that the
            // cost approximately equals the cost without the eob node
            probs[1] = 1;
            vp9_cost_tokens((int *)c[t][i][j][k][l], probs, vp9_coef_tree);
138
#else
139
140
            vp9_cost_tokens_skip((int *)c[t][i][j][k][l], probs,
                                 vp9_coef_tree);
141
142
143
            assert(c[t][i][j][k][l][DCT_EOB_TOKEN] ==
                   cnoskip[t][i][j][k][l][DCT_EOB_TOKEN]);
#endif
144
          }
145
146
}

147
148
149
150
static int rd_iifactor[32] =  { 4, 4, 3, 2, 1, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0, };
John Koleszar's avatar
John Koleszar committed
151

152
// 3* dc_qlookup[Q]*dc_qlookup[Q];
153

154
/* values are now correlated to quantizer */
Paul Wilkins's avatar
Paul Wilkins committed
155
156
157
static int sad_per_bit16lut[QINDEX_RANGE];
static int sad_per_bit4lut[QINDEX_RANGE];

158
void vp9_init_me_luts() {
John Koleszar's avatar
John Koleszar committed
159
160
161
162
163
164
165
  int i;

  // Initialize the sad lut tables using a formulaic calculation for now
  // This is to make it easier to resolve the impact of experimental changes
  // to the quantizer tables.
  for (i = 0; i < QINDEX_RANGE; i++) {
    sad_per_bit16lut[i] =
166
      (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
167
    sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742);
John Koleszar's avatar
John Koleszar committed
168
  }
Paul Wilkins's avatar
Paul Wilkins committed
169
}
John Koleszar's avatar
John Koleszar committed
170

171
static int compute_rd_mult(int qindex) {
172
  const int q = vp9_dc_quant(qindex, 0);
173
  return (11 * q * q) >> 2;
174
175
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
176
177
178
void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
  cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
  cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
John Koleszar's avatar
John Koleszar committed
179
180
}

181

Dmitry Kovalev's avatar
Dmitry Kovalev committed
182
void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
183
  int q, i, bsize;
John Koleszar's avatar
John Koleszar committed
184

185
  vp9_clear_system_state();  // __asm emms;
John Koleszar's avatar
John Koleszar committed
186

John Koleszar's avatar
John Koleszar committed
187
188
189
190
  // Further tests required to see if optimum is different
  // for key frames, golden frames and arf frames.
  // if (cpi->common.refresh_golden_frame ||
  //     cpi->common.refresh_alt_ref_frame)
191
  qindex = clamp(qindex, 0, MAXQ);
192

Dmitry Kovalev's avatar
Dmitry Kovalev committed
193
  cpi->RDMULT = compute_rd_mult(qindex);
John Koleszar's avatar
John Koleszar committed
194
195
196
197
198
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
    if (cpi->twopass.next_iiratio > 31)
      cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
    else
      cpi->RDMULT +=
Dmitry Kovalev's avatar
Dmitry Kovalev committed
199
          (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
John Koleszar's avatar
John Koleszar committed
200
  }
201
  cpi->mb.errorperbit = cpi->RDMULT >> 6;
John Koleszar's avatar
John Koleszar committed
202
  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
203

204
  vp9_set_speed_features(cpi);
John Koleszar's avatar
John Koleszar committed
205

Dmitry Kovalev's avatar
Dmitry Kovalev committed
206
207
  q = (int)pow(vp9_dc_quant(qindex, 0) >> 2, 1.25);
  q <<= 2;
John Koleszar's avatar
John Koleszar committed
208
209
  if (q < 8)
    q = 8;
210

John Koleszar's avatar
John Koleszar committed
211
212
213
  if (cpi->RDMULT > 1000) {
    cpi->RDDIV = 1;
    cpi->RDMULT /= 100;
John Koleszar's avatar
John Koleszar committed
214

215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
    for (bsize = 0; bsize < BLOCK_SIZE_TYPES; ++bsize) {
      for (i = 0; i < MAX_MODES; ++i) {
        // Threshold here seem unecessarily harsh but fine given actual
        // range of values used for cpi->sf.thresh_mult[]
        int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);

        // *4 relates to the scaling of rd_thresh_block_size_factor[]
        if ((int64_t)cpi->sf.thresh_mult[i] < thresh_max) {
          cpi->rd_threshes[bsize][i] =
            cpi->sf.thresh_mult[i] * q *
            rd_thresh_block_size_factor[bsize] / (4 * 100);
        } else {
          cpi->rd_threshes[bsize][i] = INT_MAX;
        }
        cpi->rd_baseline_thresh[bsize][i] = cpi->rd_threshes[bsize][i];
        cpi->rd_thresh_freq_fact[bsize][i] = BASE_RD_THRESH_FREQ_FACT;
John Koleszar's avatar
John Koleszar committed
231
      }
John Koleszar's avatar
John Koleszar committed
232
    }
John Koleszar's avatar
John Koleszar committed
233
234
  } else {
    cpi->RDDIV = 100;
John Koleszar's avatar
John Koleszar committed
235

236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
    for (bsize = 0; bsize < BLOCK_SIZE_TYPES; ++bsize) {
      for (i = 0; i < MAX_MODES; i++) {
        // Threshold here seem unecessarily harsh but fine given actual
        // range of values used for cpi->sf.thresh_mult[]
        int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);

        if (cpi->sf.thresh_mult[i] < thresh_max) {
          cpi->rd_threshes[bsize][i] =
            cpi->sf.thresh_mult[i] * q *
            rd_thresh_block_size_factor[bsize] / 4;
        } else {
          cpi->rd_threshes[bsize][i] = INT_MAX;
        }
        cpi->rd_baseline_thresh[bsize][i] = cpi->rd_threshes[bsize][i];
        cpi->rd_thresh_freq_fact[bsize][i] = BASE_RD_THRESH_FREQ_FACT;
John Koleszar's avatar
John Koleszar committed
251
      }
John Koleszar's avatar
John Koleszar committed
252
    }
John Koleszar's avatar
John Koleszar committed
253
  }
John Koleszar's avatar
John Koleszar committed
254

255
256
257
  fill_token_costs(cpi->mb.token_costs,
                   cpi->mb.token_costs_noskip,
                   cpi->common.fc.coef_probs);
258

259
  for (i = 0; i < NUM_PARTITION_CONTEXTS; i++)
260
    vp9_cost_tokens(cpi->mb.partition_cost[i],
261
                    cpi->common.fc.partition_prob[cpi->common.frame_type][i],
262
263
                    vp9_partition_tree);

John Koleszar's avatar
John Koleszar committed
264
  /*rough estimate for costing*/
265
  vp9_init_mode_costs(cpi);
John Koleszar's avatar
John Koleszar committed
266

267
  if (cpi->common.frame_type != KEY_FRAME) {
268
    vp9_build_nmv_cost_table(
269
270
271
272
273
274
        cpi->mb.nmvjointcost,
        cpi->mb.e_mbd.allow_high_precision_mv ?
        cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
        &cpi->common.fc.nmvc,
        cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
  }
John Koleszar's avatar
John Koleszar committed
275
276
}

277
int vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, int block_size) {
278
  int i, error = 0;
John Koleszar's avatar
John Koleszar committed
279

280
  for (i = 0; i < block_size; i++) {
John Koleszar's avatar
John Koleszar committed
281
282
283
    int this_diff = coeff[i] - dqcoeff[i];
    error += this_diff * this_diff;
  }
John Koleszar's avatar
John Koleszar committed
284

John Koleszar's avatar
John Koleszar committed
285
  return error;
John Koleszar's avatar
John Koleszar committed
286
287
}

288
static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
289
                              int plane, int block, PLANE_TYPE type,
290
291
                              ENTROPY_CONTEXT *A,
                              ENTROPY_CONTEXT *L,
John Koleszar's avatar
John Koleszar committed
292
293
                              TX_SIZE tx_size,
                              int y_blocks) {
294
  MACROBLOCKD *const xd = &mb->e_mbd;
295
296
  MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
  int pt;
297
  int c = 0;
298
299
  int cost = 0, pad;
  const int *scan, *nb;
300
301
302
  const int eob = xd->plane[plane].eobs[block];
  const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff,
                                           block, 16);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
303
  const int ref = mbmi->ref_frame[0] != INTRA_FRAME;
304
  unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
305
      mb->token_costs[tx_size][type][ref];
306
  ENTROPY_CONTEXT above_ec, left_ec;
307
  TX_TYPE tx_type = DCT_DCT;
308

309
  const int segment_id = xd->mode_info_context->mbmi.segment_id;
310
311
312
  unsigned int (*token_costs_noskip)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
      mb->token_costs_noskip[tx_size][type][ref];

313
314
  int seg_eob, default_eob;
  uint8_t token_cache[1024];
Paul Wilkins's avatar
Paul Wilkins committed
315
  const uint8_t * band_translate;
316
317

  // Check for consistency of tx_size with mode info
318
  assert((!type && !plane) || (type && plane));
319
320
321
  if (type == PLANE_TYPE_Y_WITH_DC) {
    assert(xd->mode_info_context->mbmi.txfm_size == tx_size);
  } else {
322
    TX_SIZE tx_size_uv = get_uv_tx_size(mbmi);
323
324
325
    assert(tx_size == tx_size_uv);
  }

326
  switch (tx_size) {
327
    case TX_4X4: {
328
      tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
329
          get_tx_type_4x4(xd, block) : DCT_DCT;
330
331
      above_ec = A[0] != 0;
      left_ec = L[0] != 0;
332
      seg_eob = 16;
333
      scan = get_scan_4x4(tx_type);
Paul Wilkins's avatar
Paul Wilkins committed
334
      band_translate = vp9_coefband_trans_4x4;
Daniel Kang's avatar
Daniel Kang committed
335
      break;
336
    }
337
338
    case TX_8X8: {
      const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
339
      const int sz = 1 + b_width_log2(sb_type);
340
      const int x = block & ((1 << sz) - 1), y = block - x;
341
342
      TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
          get_tx_type_8x8(xd, y + (x >> 1)) : DCT_DCT;
343
344
      above_ec = (A[0] + A[1]) != 0;
      left_ec = (L[0] + L[1]) != 0;
345
      scan = get_scan_8x8(tx_type);
346
      seg_eob = 64;
Paul Wilkins's avatar
Paul Wilkins committed
347
      band_translate = vp9_coefband_trans_8x8plus;
Daniel Kang's avatar
Daniel Kang committed
348
      break;
349
350
351
    }
    case TX_16X16: {
      const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
352
      const int sz = 2 + b_width_log2(sb_type);
353
      const int x = block & ((1 << sz) - 1), y = block - x;
354
355
      TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
          get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT;
356
      scan = get_scan_16x16(tx_type);
357
      seg_eob = 256;
358
359
      above_ec = (A[0] + A[1] + A[2] + A[3]) != 0;
      left_ec = (L[0] + L[1] + L[2] + L[3]) != 0;
Paul Wilkins's avatar
Paul Wilkins committed
360
      band_translate = vp9_coefband_trans_8x8plus;
Daniel Kang's avatar
Daniel Kang committed
361
      break;
362
    }
363
    case TX_32X32:
Paul Wilkins's avatar
Paul Wilkins committed
364
      scan = vp9_default_scan_32x32;
365
      seg_eob = 1024;
366
367
      above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0;
      left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0;
Paul Wilkins's avatar
Paul Wilkins committed
368
      band_translate = vp9_coefband_trans_8x8plus;
369
      break;
Daniel Kang's avatar
Daniel Kang committed
370
    default:
371
      abort();
Daniel Kang's avatar
Daniel Kang committed
372
373
      break;
  }
John Koleszar's avatar
John Koleszar committed
374
  assert(eob <= seg_eob);
375

376
  pt = combine_entropy_contexts(above_ec, left_ec);
377
378
  nb = vp9_get_coef_neighbors_handle(scan, &pad);
  default_eob = seg_eob;
379

380
381
  if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))
    seg_eob = 0;
382

383
384
385
386
  /* sanity check to ensure that we do not have spurious non-zero q values */
  if (eob < seg_eob)
    assert(qcoeff_ptr[scan[eob]] == 0);

387
  {
388
    for (c = 0; c < eob; c++) {
389
      int v = qcoeff_ptr[scan[c]];
390
      int t = vp9_dct_value_tokens_ptr[v].token;
Paul Wilkins's avatar
Paul Wilkins committed
391
      int band = get_coef_band(band_translate, c);
392
393
      if (c)
        pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
394

395
396
397
398
      if (!c || token_cache[scan[c - 1]])  // do not skip eob
        cost += token_costs_noskip[band][pt][t] + vp9_dct_value_cost_ptr[v];
      else
        cost += token_costs[band][pt][t] + vp9_dct_value_cost_ptr[v];
399
      token_cache[scan[c]] = vp9_pt_energy_class[t];
400
    }
401
402
403
    if (c < seg_eob) {
      if (c)
        pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
404
405
406
      cost += mb->token_costs_noskip[tx_size][type][ref]
          [get_coef_band(band_translate, c)]
          [pt][DCT_EOB_TOKEN];
407
    }
408
409
  }

410
411
412
  // is eob first coefficient;
  for (pt = 0; pt < (1 << tx_size); pt++) {
    A[pt] = L[pt] = c > 0;
413
  }
414

415
416
417
  return cost;
}

418
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
419
420
421
422
423
                                     int (*r)[2], int *rate,
                                     int *d, int *distortion,
                                     int *s, int *skip,
                                     int64_t txfm_cache[NB_TXFM_MODES],
                                     TX_SIZE max_txfm_size) {
424
425
426
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
427
  vp9_prob skip_prob = vp9_get_pred_prob(cm, xd, PRED_MBSKIP);
428
429
  int64_t rd[TX_SIZE_MAX_SB][2];
  int n, m;
430
  int s0, s1;
431

432
  const vp9_prob *tx_probs = vp9_get_pred_probs(cm, xd, PRED_TX_SIZE);
433

434
435
436
437
  for (n = TX_4X4; n <= max_txfm_size; n++) {
    r[n][1] = r[n][0];
    for (m = 0; m <= n - (n == max_txfm_size); m++) {
      if (m == n)
438
        r[n][1] += vp9_cost_zero(tx_probs[m]);
439
      else
440
        r[n][1] += vp9_cost_one(tx_probs[m]);
441
442
    }
  }
443

444
445
446
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
447

448
449
450
451
452
453
  for (n = TX_4X4; n <= max_txfm_size; n++) {
    if (s[n]) {
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
454
455
456
    }
  }

457
458
459
460
461
462
  if (max_txfm_size == TX_32X32 &&
      (cm->txfm_mode == ALLOW_32X32 ||
       (cm->txfm_mode == TX_MODE_SELECT &&
        rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
        rd[TX_32X32][1] < rd[TX_4X4][1]))) {
    mbmi->txfm_size = TX_32X32;
463
464
465
466
467
468
  } else if (max_txfm_size >= TX_16X16 &&
             (cm->txfm_mode == ALLOW_16X16 ||
              cm->txfm_mode == ALLOW_32X32 ||
              (cm->txfm_mode == TX_MODE_SELECT &&
               rd[TX_16X16][1] < rd[TX_8X8][1] &&
               rd[TX_16X16][1] < rd[TX_4X4][1]))) {
469
    mbmi->txfm_size = TX_16X16;
470
  } else if (cm->txfm_mode == ALLOW_8X8 ||
471
472
             cm->txfm_mode == ALLOW_16X16 ||
             cm->txfm_mode == ALLOW_32X32 ||
473
           (cm->txfm_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
474
475
476
477
478
    mbmi->txfm_size = TX_8X8;
  } else {
    mbmi->txfm_size = TX_4X4;
  }

479
  *distortion = d[mbmi->txfm_size];
480
  *rate       = r[mbmi->txfm_size][cm->txfm_mode == TX_MODE_SELECT];
481
482
  *skip       = s[mbmi->txfm_size];

483
484
  txfm_cache[ONLY_4X4] = rd[TX_4X4][0];
  txfm_cache[ALLOW_8X8] = rd[TX_8X8][0];
485
486
  txfm_cache[ALLOW_16X16] = rd[MIN(max_txfm_size, TX_16X16)][0];
  txfm_cache[ALLOW_32X32] = rd[MIN(max_txfm_size, TX_32X32)][0];
487
488
489
490
  if (max_txfm_size == TX_32X32 &&
      rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
      rd[TX_32X32][1] < rd[TX_4X4][1])
    txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
491
492
  else if (max_txfm_size >= TX_16X16 &&
           rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
493
    txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
494
  else
495
496
    txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
                                 rd[TX_4X4][1] : rd[TX_8X8][1];
497
498
}

499
500
static int block_error(int16_t *coeff, int16_t *dqcoeff,
                       int block_size, int shift) {
501
502
503
504
  int i;
  int64_t error = 0;

  for (i = 0; i < block_size; i++) {
505
    int this_diff = coeff[i] - dqcoeff[i];
506
    error += (unsigned)this_diff * this_diff;
507
  }
508
  error >>= shift;
509

Frank Galligan's avatar
Frank Galligan committed
510
  return error > INT_MAX ? INT_MAX : (int)error;
511
512
}

513
514
static int block_error_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) {
  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
515
  return block_error(x->plane[0].coeff, x->e_mbd.plane[0].dqcoeff,
516
                     16 << (bwl + bhl), shift);
517
}
518

519
520
521
522
static int block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) {
  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
  int64_t sum = 0;
  int plane;
523

524
525
526
527
528
529
530
531
  for (plane = 1; plane < MAX_MB_PLANE; plane++) {
    const int subsampling = x->e_mbd.plane[plane].subsampling_x +
                            x->e_mbd.plane[plane].subsampling_y;
    sum += block_error(x->plane[plane].coeff, x->e_mbd.plane[plane].dqcoeff,
                       16 << (bwl + bhl - subsampling), 0);
  }
  sum >>= shift;
  return sum > INT_MAX ? INT_MAX : (int)sum;
532
533
}

534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
struct rdcost_block_args {
  VP9_COMMON *cm;
  MACROBLOCK *x;
  ENTROPY_CONTEXT t_above[16];
  ENTROPY_CONTEXT t_left[16];
  TX_SIZE tx_size;
  int bw;
  int bh;
  int cost;
};

static void rdcost_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
                         int ss_txfrm_size, void *arg) {
  struct rdcost_block_args* args = arg;
  int x_idx, y_idx;
  MACROBLOCKD * const xd = &args->x->e_mbd;

  txfrm_block_to_raster_xy(xd, bsize, plane, block, args->tx_size * 2, &x_idx,
                           &y_idx);

  args->cost += cost_coeffs(args->cm, args->x, plane, block,
                            xd->plane[plane].plane_type, args->t_above + x_idx,
                            args->t_left + y_idx, args->tx_size,
                            args->bw * args->bh);
}

static int rdcost_plane(VP9_COMMON * const cm, MACROBLOCK *x, int plane,
                        BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
  MACROBLOCKD * const xd = &x->e_mbd;
563
564
565
  const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
  const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y;
  const int bw = 1 << bwl, bh = 1 << bhl;
566
  struct rdcost_block_args args = { cm, x, { 0 }, { 0 }, tx_size, bw, bh, 0 };
567

568
  vpx_memcpy(&args.t_above, xd->plane[plane].above_context,
569
             sizeof(ENTROPY_CONTEXT) * bw);
570
  vpx_memcpy(&args.t_left, xd->plane[plane].left_context,
571
             sizeof(ENTROPY_CONTEXT) * bh);
572

573
  foreach_transformed_block_in_plane(xd, bsize, plane, rdcost_block, &args);
574

575
  return args.cost;
576
577
}

578
579
580
static int rdcost_uv(VP9_COMMON *const cm, MACROBLOCK *x,
                     BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
  int cost = 0, plane;
581

582
583
  for (plane = 1; plane < MAX_MB_PLANE; plane++) {
    cost += rdcost_plane(cm, x, plane, bsize, tx_size);
584
585
  }
  return cost;
586
587
}

588
589
590
static void super_block_yrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
                                     int *rate, int *distortion, int *skippable,
                                     BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
591
  MACROBLOCKD *const xd = &x->e_mbd;
592
  xd->mode_info_context->mbmi.txfm_size = tx_size;
593

Ronald S. Bultje's avatar
Ronald S. Bultje committed
594
  if (xd->mode_info_context->mbmi.ref_frame[0] == INTRA_FRAME)
595
596
597
    vp9_encode_intra_block_y(cm, x, bsize);
  else
    vp9_xform_quant_sby(cm, x, bsize);
598

599
600
  *distortion = block_error_sby(x, bsize, tx_size == TX_32X32 ? 0 : 2);
  *rate       = rdcost_plane(cm, x, 0, bsize, tx_size);
601
  *skippable  = vp9_sby_is_skippable(xd, bsize);
602
603
}

604
605
static void super_block_yrd(VP9_COMP *cpi,
                            MACROBLOCK *x, int *rate, int *distortion,
606
                            int *skip, BLOCK_SIZE_TYPE bs,
607
                            int64_t txfm_cache[NB_TXFM_MODES]) {
608
609
  VP9_COMMON *const cm = &cpi->common;
  int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB];
Jim Bankoski's avatar
Jim Bankoski committed
610
611
  MACROBLOCKD *xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
612

613
  assert(bs == mbmi->sb_type);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
614
  if (mbmi->ref_frame[0] > INTRA_FRAME)
615
    vp9_subtract_sby(x, bs);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
616

Jim Bankoski's avatar
Jim Bankoski committed
617
618
619
620
621
622
623
624
625
626
  if (cpi->speed > 4) {
    if (bs >= BLOCK_SIZE_SB32X32) {
      mbmi->txfm_size = TX_32X32;
    } else if (bs >= BLOCK_SIZE_MB16X16) {
      mbmi->txfm_size = TX_16X16;
    } else if (bs >= BLOCK_SIZE_SB8X8) {
      mbmi->txfm_size = TX_8X8;
    } else {
      mbmi->txfm_size = TX_4X4;
    }
627
    vpx_memset(txfm_cache, 0, NB_TXFM_MODES * sizeof(int64_t));
Jim Bankoski's avatar
Jim Bankoski committed
628
629
630
631
    super_block_yrd_for_txfm(cm, x, rate, distortion, skip, bs,
                             mbmi->txfm_size);
    return;
  }
632
  if (bs >= BLOCK_SIZE_SB32X32)
633
634
    super_block_yrd_for_txfm(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32],
                             bs, TX_32X32);
635
  if (bs >= BLOCK_SIZE_MB16X16)
636
637
638
639
640
641
    super_block_yrd_for_txfm(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16],
                             bs, TX_16X16);
  super_block_yrd_for_txfm(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], bs,
                           TX_8X8);
  super_block_yrd_for_txfm(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], bs,
                           TX_4X4);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
642

643
  choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s,
644
                           skip, txfm_cache,
645
                           TX_32X32 - (bs < BLOCK_SIZE_SB32X32)
Jingning Han's avatar
Jingning Han committed
646
                           - (bs < BLOCK_SIZE_MB16X16));
Ronald S. Bultje's avatar
Ronald S. Bultje committed
647
}
Ronald S. Bultje's avatar
Ronald S. Bultje committed
648

649
static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
Yaowu Xu's avatar
Yaowu Xu committed
650
                                     MB_PREDICTION_MODE *best_mode,
651
652
653
                                     int *bmode_costs,
                                     ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                                     int *bestrate, int *bestratey,
654
655
656
                                     int *bestdistortion,
                                     BLOCK_SIZE_TYPE bsize) {
  MB_PREDICTION_MODE mode;
Deb Mukherjee's avatar
Deb Mukherjee committed
657
  MACROBLOCKD *xd = &x->e_mbd;
658
  int64_t best_rd = INT64_MAX;
John Koleszar's avatar
John Koleszar committed
659
660
  int rate = 0;
  int distortion;
661
  VP9_COMMON *const cm = &cpi->common;
John Koleszar's avatar
John Koleszar committed
662
  const int src_stride = x->plane[0].src.stride;
663
664
665
666
667
  uint8_t *src, *dst;
  int16_t *src_diff, *coeff;

  ENTROPY_CONTEXT ta[2], tempa[2];
  ENTROPY_CONTEXT tl[2], templ[2];
Deb Mukherjee's avatar
Deb Mukherjee committed
668
669
  TX_TYPE tx_type = DCT_DCT;
  TX_TYPE best_tx_type = DCT_DCT;
670
671
672
673
  int bw = 1 << b_width_log2(bsize);
  int bh = 1 << b_height_log2(bsize);
  int idx, idy, block;
  DECLARE_ALIGNED(16, int16_t, best_dqcoeff[4][16]);
John Koleszar's avatar
John Koleszar committed
674

Jingning Han's avatar
Jingning Han committed
675
  assert(ib < 4);
676

677
678
  vpx_memcpy(ta, a, sizeof(ta));
  vpx_memcpy(tl, l, sizeof(tl));
679
  xd->mode_info_context->mbmi.txfm_size = TX_4X4;
680
681

  for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
682
    int64_t this_rd;
683
    int ratey = 0;
684

685
    rate = bmode_costs[mode];
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
    distortion = 0;

    vpx_memcpy(tempa, ta, sizeof(ta));
    vpx_memcpy(templ, tl, sizeof(tl));

    for (idy = 0; idy < bh; ++idy) {
      for (idx = 0; idx < bw; ++idx) {
        block = ib + idy * 2 + idx;
        xd->mode_info_context->bmi[block].as_mode.first = mode;
        src = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
                                        x->plane[0].src.buf, src_stride);
        src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, block,
                                             x->plane[0].src_diff);
        coeff = BLOCK_OFFSET(x->plane[0].coeff, block, 16);
        dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
                                        xd->plane[0].dst.buf,
                                        xd->plane[0].dst.stride);
703
704
        vp9_intra4x4_predict(xd, block, BLOCK_SIZE_SB8X8, mode,
                             dst, xd->plane[0].dst.stride);
705
706
707
708
709
710
711
712
713
714
715
716
        vp9_subtract_block(4, 4, src_diff, 8,
                           src, src_stride,
                           dst, xd->plane[0].dst.stride);

        tx_type = get_tx_type_4x4(xd, block);
        if (tx_type != DCT_DCT) {
          vp9_short_fht4x4(src_diff, coeff, 8, tx_type);
          x->quantize_b_4x4(x, block, tx_type, 16);
        } else {
          x->fwd_txm4x4(src_diff, coeff, 16);
          x->quantize_b_4x4(x, block, tx_type, 16);
        }
John Koleszar's avatar
John Koleszar committed
717

718
719
720
721
        ratey += cost_coeffs(cm, x, 0, block, PLANE_TYPE_Y_WITH_DC,
                             tempa + idx, templ + idy, TX_4X4, 16);
        distortion += vp9_block_error(coeff, BLOCK_OFFSET(xd->plane[0].dqcoeff,
                                                         block, 16), 16) >> 2;
John Koleszar's avatar
John Koleszar committed
722

723
724
725
726
727
728
729
730
        if (best_tx_type != DCT_DCT)
          vp9_short_iht4x4_add(BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
                               dst, xd->plane[0].dst.stride, best_tx_type);
        else
          xd->inv_txm4x4_add(BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
                             dst, xd->plane[0].dst.stride);
      }
    }
Jingning Han's avatar
Jingning Han committed
731

732
733
    rate += ratey;
    this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
Jingning Han's avatar
Jingning Han committed
734

735
736
737
738
739
740
741
    if (this_rd < best_rd) {
      *bestrate = rate;
      *bestratey = ratey;
      *bestdistortion = distortion;
      best_rd = this_rd;
      *best_mode = mode;
      best_tx_type = tx_type;
742
743
744
745
746
747
748
749
750
751
      vpx_memcpy(a, tempa, sizeof(tempa));
      vpx_memcpy(l, templ, sizeof(templ));
      for (idy = 0; idy < bh; ++idy) {
        for (idx = 0; idx < bw; ++idx) {
          block = ib + idy * 2 + idx;
          vpx_memcpy(best_dqcoeff[idy * 2 + idx],
                     BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
                     sizeof(best_dqcoeff[0]));
        }
      }
John Koleszar's avatar
John Koleszar committed
752
    }
John Koleszar's avatar
John Koleszar committed
753
  }
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772

  for (idy = 0; idy < bh; ++idy) {
    for (idx = 0; idx < bw; ++idx) {
      block = ib + idy * 2 + idx;
      xd->mode_info_context->bmi[block].as_mode.first = *best_mode;
      dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
                                      xd->plane[0].dst.buf,
                                      xd->plane[0].dst.stride);

      vp9_intra4x4_predict(xd, block, BLOCK_SIZE_SB8X8, *best_mode,
                           dst, xd->plane[0].dst.stride);
      // inverse transform
      if (best_tx_type != DCT_DCT)
        vp9_short_iht4x4_add(best_dqcoeff[idy * 2 + idx], dst,
                             xd->plane[0].dst.stride, best_tx_type);
      else
        xd->inv_txm4x4_add(best_dqcoeff[idy * 2 + idx], dst,
                           xd->plane[0].dst.stride);
    }
Scott LaVarnway's avatar
Scott LaVarnway committed
773
  }
John Koleszar's avatar
John Koleszar committed
774

John Koleszar's avatar
John Koleszar committed
775
  return best_rd;
John Koleszar's avatar
John Koleszar committed
776
777
}

778
779
static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
                                         int *Rate, int *rate_y,
780
                                         int *Distortion, int64_t best_rd) {
781
  int i, j;
John Koleszar's avatar
John Koleszar committed
782
  MACROBLOCKD *const xd = &mb->e_mbd;
783
784
785
786
  BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type;
  int bw = 1 << b_width_log2(bsize);
  int bh = 1 << b_height_log2(bsize);
  int idx, idy;
787
  int cost = 0;
John Koleszar's avatar
John Koleszar committed
788
789
790
  int distortion = 0;
  int tot_rate_y = 0;
  int64_t total_rd = 0;
791
  ENTROPY_CONTEXT t_above[4], t_left[4];
John Koleszar's avatar
John Koleszar committed
792
  int *bmode_costs;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
793
  MODE_INFO *const mic = xd->mode_info_context;
John Koleszar's avatar
John Koleszar committed
794

795
796
  vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
  vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
John Koleszar's avatar
John Koleszar committed
797

798
  bmode_costs = mb->mbmode_cost;
799

800
801
802
  for (idy = 0; idy < 2; idy += bh) {
    for (idx = 0; idx < 2; idx += bw) {
      const int mis = xd->mode_info_stride;
Yaowu Xu's avatar
Yaowu Xu committed
803
      MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
804
805
806
807
808
809
810
811
812
      int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry);
      int UNINITIALIZED_IS_SAFE(d);
      i = idy * 2 + idx;

      if (xd->frame_type == KEY_FRAME) {
        const MB_PREDICTION_MODE A = above_block_mode(mic, i, mis);
        const MB_PREDICTION_MODE L = (xd->left_available || idx) ?
                                     left_block_mode(mic, i) : DC_PRED;

813
        bmode_costs  = mb->y_mode_costs[A][L];
814
      }
815

816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
      total_rd += rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs,
                                        t_above + idx, t_left + idy,
                                        &r, &ry, &d, bsize);
      cost += r;
      distortion += d;
      tot_rate_y += ry;

      mic->bmi[i].as_mode.first = best_mode;
      for (j = 1; j < bh; ++j)
        mic->bmi[i + j * 2].as_mode.first = best_mode;
      for (j = 1; j < bw; ++j)
        mic->bmi[i + j].as_mode.first = best_mode;

      if (total_rd >= best_rd)
        break;
John Koleszar's avatar
John Koleszar committed
831
832
    }
  }
John Koleszar's avatar
John Koleszar committed
833

834
  if (total_rd >= best_rd)
835
    return INT64_MAX;
836

John Koleszar's avatar
John Koleszar committed
837
  *Rate = cost;
838
  *rate_y = tot_rate_y;
John Koleszar's avatar
John Koleszar committed
839
  *Distortion = distortion;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
840
  xd->mode_info_context->mbmi.mode = mic->bmi[3].as_mode.first;
John Koleszar's avatar
John Koleszar committed
841

John Koleszar's avatar
John Koleszar committed
842
  return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
John Koleszar's avatar
John Koleszar committed
843
}
844

845
846
847
848
static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
                                      int *rate, int *rate_tokenonly,
                                      int *distortion, int *skippable,
                                      BLOCK_SIZE_TYPE bsize,
849
                                      int64_t txfm_cache[NB_TXFM_MODES]) {
Ronald S. Bultje's avatar
Ronald S. Bultje committed
850
851
  MB_PREDICTION_MODE mode;
  MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
Jim Bankoski's avatar
Jim Bankoski committed
852
  MACROBLOCKD *const xd = &x->e_mbd;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
853
  int this_rate, this_rate_tokenonly;
854
  int this_distortion, s;
855
  int64_t best_rd = INT64_MAX, this_rd;
856
857
  TX_SIZE UNINITIALIZED_IS_SAFE(best_tx);
  int i;
858
  int *bmode_costs = x->mbmode_cost;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
859

860
861
862
863
864
  if (bsize < BLOCK_SIZE_SB8X8) {
    x->e_mbd.mode_info_context->mbmi.txfm_size = TX_4X4;
    return best_rd;
  }

865
866
  for (i = 0; i < NB_TXFM_MODES; i++)
    txfm_cache[i] = INT64_MAX;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
867
868
869

  /* Y Search for 32x32 intra prediction mode */
  for (mode = DC_PRED; mode <= TM_PRED; mode++) {
870
    int64_t local_txfm_cache[NB_TXFM_MODES];
871
872
    MODE_INFO *const mic = xd->mode_info_context;
    const int mis = xd->mode_info_stride;
873

874
875
876
877
878
879
880
    if (cpi->common.frame_type == KEY_FRAME) {
      const MB_PREDICTION_MODE A = above_block_mode(mic, 0, mis);
      const MB_PREDICTION_MODE L = xd->left_available ?
                                   left_block_mode(mic, 0) : DC_PRED;

      bmode_costs = x->y_mode_costs[A][L];
    }
Ronald S. Bultje's avatar
Ronald S. Bultje committed
881
882
    x->e_mbd.mode_info_context->mbmi.mode = mode;

883
884
    super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s,
                    bsize, local_txfm_cache);
Jim Bankoski's avatar
Jim Bankoski committed
885

886
    this_rate = this_rate_tokenonly + bmode_costs[mode];
Ronald S. Bultje's avatar
Ronald S. Bultje committed
887
888
889
890
891
    this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);

    if (this_rd < best_rd) {
      mode_selected   = mode;
      best_rd         = this_rd;
892
      best_tx         = x->e_mbd.mode_info_context->mbmi.txfm_size;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
893
894
895
896
897
      *rate           = this_rate;
      *rate_tokenonly = this_rate_tokenonly;
      *distortion     = this_distortion;
      *skippable      = s;
    }
898

899
900
    for (i = 0; i < NB_TXFM_MODES; i++) {
      int64_t adj_rd = this_rd + local_txfm_cache[i] -
901
                       local_txfm_cache[cpi->common.txfm_mode];
902
903
      if (adj_rd < txfm_cache[i]) {
        txfm_cache[i] = adj_rd;
John Koleszar's avatar
John Koleszar committed
904
      }
John Koleszar's avatar
John Koleszar committed
905
    }
John Koleszar's avatar
John Koleszar committed
906
  }
John Koleszar's avatar
John Koleszar committed
907

908
909
  x->e_mbd.mode_info_context->mbmi.mode = mode_selected;
  x->