vp9_rdopt.c 120 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
11
12
13
14
15
 */


#include <stdio.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
16

17
#include "vp9/common/vp9_pragmas.h"
18
19
20
21
22
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_modecosts.h"
#include "vp9/encoder/vp9_encodeintra.h"
23
24
25
26
27
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_quant_common.h"
28
29
30
31
32
33
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_variance.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_ratectrl.h"
John Koleszar's avatar
John Koleszar committed
34
#include "vpx_mem/vpx_mem.h"
35
36
37
38
39
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_entropy.h"
40
#include "vp9_rtcd.h"
41
#include "vp9/common/vp9_mvref_common.h"
Ronald S. Bultje's avatar
Ronald S. Bultje committed
42
#include "vp9/common/vp9_common.h"
Paul Wilkins's avatar
Paul Wilkins committed
43

44
45
#define INVALID_MV 0x80008000

46
47
48
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1

49
50
51
DECLARE_ALIGNED(16, extern const uint8_t,
                vp9_pt_energy_class[MAX_ENTROPY_TOKENS]);

Ronald S. Bultje's avatar
Ronald S. Bultje committed
52
#define I4X4_PRED 0x8000
Ronald S. Bultje's avatar
Ronald S. Bultje committed
53
#define SPLITMV 0x10000
Ronald S. Bultje's avatar
Ronald S. Bultje committed
54

55
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
56
57
  {ZEROMV,    LAST_FRAME,   NONE},
  {DC_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
58

59
60
  {NEARESTMV, LAST_FRAME,   NONE},
  {NEARMV,    LAST_FRAME,   NONE},
John Koleszar's avatar
John Koleszar committed
61

62
63
  {ZEROMV,    GOLDEN_FRAME, NONE},
  {NEARESTMV, GOLDEN_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
64

65
66
  {ZEROMV,    ALTREF_FRAME, NONE},
  {NEARESTMV, ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
67

68
69
  {NEARMV,    GOLDEN_FRAME, NONE},
  {NEARMV,    ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
70

71
72
73
74
75
76
77
78
  {V_PRED,    INTRA_FRAME,  NONE},
  {H_PRED,    INTRA_FRAME,  NONE},
  {D45_PRED,  INTRA_FRAME,  NONE},
  {D135_PRED, INTRA_FRAME,  NONE},
  {D117_PRED, INTRA_FRAME,  NONE},
  {D153_PRED, INTRA_FRAME,  NONE},
  {D27_PRED,  INTRA_FRAME,  NONE},
  {D63_PRED,  INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
79

80
  {TM_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
81

82
83
84
  {NEWMV,     LAST_FRAME,   NONE},
  {NEWMV,     GOLDEN_FRAME, NONE},
  {NEWMV,     ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
85

86
87
88
  {SPLITMV,   LAST_FRAME,   NONE},
  {SPLITMV,   GOLDEN_FRAME, NONE},
  {SPLITMV,   ALTREF_FRAME, NONE},
89

Ronald S. Bultje's avatar
Ronald S. Bultje committed
90
  {I4X4_PRED, INTRA_FRAME,  NONE},
91

John Koleszar's avatar
John Koleszar committed
92
93
94
95
  /* compound prediction modes */
  {ZEROMV,    LAST_FRAME,   GOLDEN_FRAME},
  {NEARESTMV, LAST_FRAME,   GOLDEN_FRAME},
  {NEARMV,    LAST_FRAME,   GOLDEN_FRAME},
96

John Koleszar's avatar
John Koleszar committed
97
98
99
  {ZEROMV,    ALTREF_FRAME, LAST_FRAME},
  {NEARESTMV, ALTREF_FRAME, LAST_FRAME},
  {NEARMV,    ALTREF_FRAME, LAST_FRAME},
100

John Koleszar's avatar
John Koleszar committed
101
102
103
  {ZEROMV,    GOLDEN_FRAME, ALTREF_FRAME},
  {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
  {NEARMV,    GOLDEN_FRAME, ALTREF_FRAME},
104

John Koleszar's avatar
John Koleszar committed
105
106
107
  {NEWMV,     LAST_FRAME,   GOLDEN_FRAME},
  {NEWMV,     ALTREF_FRAME, LAST_FRAME  },
  {NEWMV,     GOLDEN_FRAME, ALTREF_FRAME},
108

John Koleszar's avatar
John Koleszar committed
109
110
  {SPLITMV,   LAST_FRAME,   GOLDEN_FRAME},
  {SPLITMV,   ALTREF_FRAME, LAST_FRAME  },
111
  {SPLITMV,   GOLDEN_FRAME, ALTREF_FRAME},
John Koleszar's avatar
John Koleszar committed
112
113
};

114
115
116
117
118
119
120
121
122
123
124
// The baseline rd thresholds for breaking out of the rd loop for
// certain modes are assumed to be based on 8x8 blocks.
// This table is used to correct for blocks size.
// The factors here are << 2 (2 = x0.5, 32 = x8 etc).
static int rd_thresh_block_size_factor[BLOCK_SIZE_TYPES] =
  {2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32};

#define BASE_RD_THRESH_FREQ_FACT 16
#define MAX_RD_THRESH_FREQ_FACT 32
#define MAX_RD_THRESH_FREQ_INC 1

125
126
127
static void fill_token_costs(vp9_coeff_count (*c)[BLOCK_TYPES],
                             vp9_coeff_count (*cnoskip)[BLOCK_TYPES],
                             vp9_coeff_probs_model (*p)[BLOCK_TYPES]) {
128
  int i, j, k, l;
129
130
131
132
133
134
135
136
137
138
  TX_SIZE t;
  for (t = TX_4X4; t <= TX_32X32; t++)
    for (i = 0; i < BLOCK_TYPES; i++)
      for (j = 0; j < REF_TYPES; j++)
        for (k = 0; k < COEF_BANDS; k++)
          for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
            vp9_prob probs[ENTROPY_NODES];
            vp9_model_to_full_probs(p[t][i][j][k][l], probs);
            vp9_cost_tokens((int *)cnoskip[t][i][j][k][l], probs,
                            vp9_coef_tree);
139
#if CONFIG_BALANCED_COEFTREE
140
141
142
143
            // Replace the eob node prob with a very small value so that the
            // cost approximately equals the cost without the eob node
            probs[1] = 1;
            vp9_cost_tokens((int *)c[t][i][j][k][l], probs, vp9_coef_tree);
144
#else
145
146
            vp9_cost_tokens_skip((int *)c[t][i][j][k][l], probs,
                                 vp9_coef_tree);
147
148
149
            assert(c[t][i][j][k][l][DCT_EOB_TOKEN] ==
                   cnoskip[t][i][j][k][l][DCT_EOB_TOKEN]);
#endif
150
          }
151
152
}

153
154
155
156
static int rd_iifactor[32] =  { 4, 4, 3, 2, 1, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0, };
John Koleszar's avatar
John Koleszar committed
157

158
// 3* dc_qlookup[Q]*dc_qlookup[Q];
159

160
/* values are now correlated to quantizer */
Paul Wilkins's avatar
Paul Wilkins committed
161
162
163
static int sad_per_bit16lut[QINDEX_RANGE];
static int sad_per_bit4lut[QINDEX_RANGE];

164
void vp9_init_me_luts() {
John Koleszar's avatar
John Koleszar committed
165
166
167
168
169
170
171
  int i;

  // Initialize the sad lut tables using a formulaic calculation for now
  // This is to make it easier to resolve the impact of experimental changes
  // to the quantizer tables.
  for (i = 0; i < QINDEX_RANGE; i++) {
    sad_per_bit16lut[i] =
172
      (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
173
    sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742);
John Koleszar's avatar
John Koleszar committed
174
  }
Paul Wilkins's avatar
Paul Wilkins committed
175
}
John Koleszar's avatar
John Koleszar committed
176

177
static int compute_rd_mult(int qindex) {
178
  const int q = vp9_dc_quant(qindex, 0);
179
  return (11 * q * q) >> 2;
180
181
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
182
183
184
void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
  cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
  cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
John Koleszar's avatar
John Koleszar committed
185
186
}

187

Dmitry Kovalev's avatar
Dmitry Kovalev committed
188
void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
189
  int q, i, bsize;
John Koleszar's avatar
John Koleszar committed
190

191
  vp9_clear_system_state();  // __asm emms;
John Koleszar's avatar
John Koleszar committed
192

John Koleszar's avatar
John Koleszar committed
193
194
195
196
  // Further tests required to see if optimum is different
  // for key frames, golden frames and arf frames.
  // if (cpi->common.refresh_golden_frame ||
  //     cpi->common.refresh_alt_ref_frame)
197
  qindex = clamp(qindex, 0, MAXQ);
198

Dmitry Kovalev's avatar
Dmitry Kovalev committed
199
  cpi->RDMULT = compute_rd_mult(qindex);
John Koleszar's avatar
John Koleszar committed
200
201
202
203
204
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
    if (cpi->twopass.next_iiratio > 31)
      cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
    else
      cpi->RDMULT +=
Dmitry Kovalev's avatar
Dmitry Kovalev committed
205
          (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
John Koleszar's avatar
John Koleszar committed
206
  }
207
  cpi->mb.errorperbit = cpi->RDMULT >> 6;
John Koleszar's avatar
John Koleszar committed
208
  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
209

210
  vp9_set_speed_features(cpi);
John Koleszar's avatar
John Koleszar committed
211

Dmitry Kovalev's avatar
Dmitry Kovalev committed
212
213
  q = (int)pow(vp9_dc_quant(qindex, 0) >> 2, 1.25);
  q <<= 2;
John Koleszar's avatar
John Koleszar committed
214
215
  if (q < 8)
    q = 8;
216

John Koleszar's avatar
John Koleszar committed
217
218
219
  if (cpi->RDMULT > 1000) {
    cpi->RDDIV = 1;
    cpi->RDMULT /= 100;
John Koleszar's avatar
John Koleszar committed
220

221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
    for (bsize = 0; bsize < BLOCK_SIZE_TYPES; ++bsize) {
      for (i = 0; i < MAX_MODES; ++i) {
        // Threshold here seem unecessarily harsh but fine given actual
        // range of values used for cpi->sf.thresh_mult[]
        int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);

        // *4 relates to the scaling of rd_thresh_block_size_factor[]
        if ((int64_t)cpi->sf.thresh_mult[i] < thresh_max) {
          cpi->rd_threshes[bsize][i] =
            cpi->sf.thresh_mult[i] * q *
            rd_thresh_block_size_factor[bsize] / (4 * 100);
        } else {
          cpi->rd_threshes[bsize][i] = INT_MAX;
        }
        cpi->rd_baseline_thresh[bsize][i] = cpi->rd_threshes[bsize][i];
        cpi->rd_thresh_freq_fact[bsize][i] = BASE_RD_THRESH_FREQ_FACT;
John Koleszar's avatar
John Koleszar committed
237
      }
John Koleszar's avatar
John Koleszar committed
238
    }
John Koleszar's avatar
John Koleszar committed
239
240
  } else {
    cpi->RDDIV = 100;
John Koleszar's avatar
John Koleszar committed
241

242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
    for (bsize = 0; bsize < BLOCK_SIZE_TYPES; ++bsize) {
      for (i = 0; i < MAX_MODES; i++) {
        // Threshold here seem unecessarily harsh but fine given actual
        // range of values used for cpi->sf.thresh_mult[]
        int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);

        if (cpi->sf.thresh_mult[i] < thresh_max) {
          cpi->rd_threshes[bsize][i] =
            cpi->sf.thresh_mult[i] * q *
            rd_thresh_block_size_factor[bsize] / 4;
        } else {
          cpi->rd_threshes[bsize][i] = INT_MAX;
        }
        cpi->rd_baseline_thresh[bsize][i] = cpi->rd_threshes[bsize][i];
        cpi->rd_thresh_freq_fact[bsize][i] = BASE_RD_THRESH_FREQ_FACT;
John Koleszar's avatar
John Koleszar committed
257
      }
John Koleszar's avatar
John Koleszar committed
258
    }
John Koleszar's avatar
John Koleszar committed
259
  }
John Koleszar's avatar
John Koleszar committed
260

261
262
263
  fill_token_costs(cpi->mb.token_costs,
                   cpi->mb.token_costs_noskip,
                   cpi->common.fc.coef_probs);
264

265
  for (i = 0; i < NUM_PARTITION_CONTEXTS; i++)
266
    vp9_cost_tokens(cpi->mb.partition_cost[i],
267
                    cpi->common.fc.partition_prob[cpi->common.frame_type][i],
268
269
                    vp9_partition_tree);

John Koleszar's avatar
John Koleszar committed
270
  /*rough estimate for costing*/
271
  vp9_init_mode_costs(cpi);
John Koleszar's avatar
John Koleszar committed
272

273
  if (cpi->common.frame_type != KEY_FRAME) {
274
    vp9_build_nmv_cost_table(
275
276
277
278
279
280
        cpi->mb.nmvjointcost,
        cpi->mb.e_mbd.allow_high_precision_mv ?
        cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
        &cpi->common.fc.nmvc,
        cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
  }
John Koleszar's avatar
John Koleszar committed
281
282
}

283
int vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, int block_size) {
284
  int i, error = 0;
John Koleszar's avatar
John Koleszar committed
285

286
  for (i = 0; i < block_size; i++) {
John Koleszar's avatar
John Koleszar committed
287
288
289
    int this_diff = coeff[i] - dqcoeff[i];
    error += this_diff * this_diff;
  }
John Koleszar's avatar
John Koleszar committed
290

John Koleszar's avatar
John Koleszar committed
291
  return error;
John Koleszar's avatar
John Koleszar committed
292
293
}

294
static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
295
                              int plane, int block, PLANE_TYPE type,
296
297
                              ENTROPY_CONTEXT *A,
                              ENTROPY_CONTEXT *L,
John Koleszar's avatar
John Koleszar committed
298
299
                              TX_SIZE tx_size,
                              int y_blocks) {
300
  MACROBLOCKD *const xd = &mb->e_mbd;
301
302
  MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
  int pt;
303
  int c = 0;
304
305
  int cost = 0, pad;
  const int *scan, *nb;
306
307
308
  const int eob = xd->plane[plane].eobs[block];
  const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff,
                                           block, 16);
309
  const int ref = mbmi->ref_frame != INTRA_FRAME;
310
  unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
311
      mb->token_costs[tx_size][type][ref];
312
  ENTROPY_CONTEXT above_ec, left_ec;
313
  TX_TYPE tx_type = DCT_DCT;
314

315
  const int segment_id = xd->mode_info_context->mbmi.segment_id;
316
317
318
  unsigned int (*token_costs_noskip)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
      mb->token_costs_noskip[tx_size][type][ref];

319
320
  int seg_eob, default_eob;
  uint8_t token_cache[1024];
Paul Wilkins's avatar
Paul Wilkins committed
321
  const uint8_t * band_translate;
322
323

  // Check for consistency of tx_size with mode info
324
  assert((!type && !plane) || (type && plane));
325
326
327
328
329
330
331
  if (type == PLANE_TYPE_Y_WITH_DC) {
    assert(xd->mode_info_context->mbmi.txfm_size == tx_size);
  } else {
    TX_SIZE tx_size_uv = get_uv_tx_size(xd);
    assert(tx_size == tx_size_uv);
  }

332
  switch (tx_size) {
333
    case TX_4X4: {
334
      tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
335
          get_tx_type_4x4(xd, block) : DCT_DCT;
336
337
      above_ec = A[0] != 0;
      left_ec = L[0] != 0;
338
      seg_eob = 16;
339
      scan = get_scan_4x4(tx_type);
Paul Wilkins's avatar
Paul Wilkins committed
340
      band_translate = vp9_coefband_trans_4x4;
Daniel Kang's avatar
Daniel Kang committed
341
      break;
342
    }
343
344
    case TX_8X8: {
      const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
345
      const int sz = 1 + b_width_log2(sb_type);
346
      const int x = block & ((1 << sz) - 1), y = block - x;
347
348
      TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
          get_tx_type_8x8(xd, y + (x >> 1)) : DCT_DCT;
349
350
      above_ec = (A[0] + A[1]) != 0;
      left_ec = (L[0] + L[1]) != 0;
351
      scan = get_scan_8x8(tx_type);
352
      seg_eob = 64;
Paul Wilkins's avatar
Paul Wilkins committed
353
      band_translate = vp9_coefband_trans_8x8plus;
Daniel Kang's avatar
Daniel Kang committed
354
      break;
355
356
357
    }
    case TX_16X16: {
      const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
358
      const int sz = 2 + b_width_log2(sb_type);
359
      const int x = block & ((1 << sz) - 1), y = block - x;
360
361
      TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
          get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT;
362
      scan = get_scan_16x16(tx_type);
363
      seg_eob = 256;
364
365
      above_ec = (A[0] + A[1] + A[2] + A[3]) != 0;
      left_ec = (L[0] + L[1] + L[2] + L[3]) != 0;
Paul Wilkins's avatar
Paul Wilkins committed
366
      band_translate = vp9_coefband_trans_8x8plus;
Daniel Kang's avatar
Daniel Kang committed
367
      break;
368
    }
369
    case TX_32X32:
Paul Wilkins's avatar
Paul Wilkins committed
370
      scan = vp9_default_scan_32x32;
371
      seg_eob = 1024;
372
373
      above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0;
      left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0;
Paul Wilkins's avatar
Paul Wilkins committed
374
      band_translate = vp9_coefband_trans_8x8plus;
375
      break;
Daniel Kang's avatar
Daniel Kang committed
376
    default:
377
      abort();
Daniel Kang's avatar
Daniel Kang committed
378
379
      break;
  }
John Koleszar's avatar
John Koleszar committed
380
  assert(eob <= seg_eob);
381

382
  pt = combine_entropy_contexts(above_ec, left_ec);
383
384
  nb = vp9_get_coef_neighbors_handle(scan, &pad);
  default_eob = seg_eob;
385

386
387
  if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))
    seg_eob = 0;
388

389
390
391
392
  /* sanity check to ensure that we do not have spurious non-zero q values */
  if (eob < seg_eob)
    assert(qcoeff_ptr[scan[eob]] == 0);

393
  {
394
    for (c = 0; c < eob; c++) {
395
      int v = qcoeff_ptr[scan[c]];
396
      int t = vp9_dct_value_tokens_ptr[v].token;
Paul Wilkins's avatar
Paul Wilkins committed
397
      int band = get_coef_band(band_translate, c);
398
399
      if (c)
        pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
400

401
402
403
404
      if (!c || token_cache[scan[c - 1]])  // do not skip eob
        cost += token_costs_noskip[band][pt][t] + vp9_dct_value_cost_ptr[v];
      else
        cost += token_costs[band][pt][t] + vp9_dct_value_cost_ptr[v];
405
      token_cache[scan[c]] = vp9_pt_energy_class[t];
406
    }
407
408
409
    if (c < seg_eob) {
      if (c)
        pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
410
411
412
      cost += mb->token_costs_noskip[tx_size][type][ref]
          [get_coef_band(band_translate, c)]
          [pt][DCT_EOB_TOKEN];
413
    }
414
415
  }

416
417
418
  // is eob first coefficient;
  for (pt = 0; pt < (1 << tx_size); pt++) {
    A[pt] = L[pt] = c > 0;
419
  }
420

421
422
423
  return cost;
}

424
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
425
426
427
428
429
                                     int (*r)[2], int *rate,
                                     int *d, int *distortion,
                                     int *s, int *skip,
                                     int64_t txfm_cache[NB_TXFM_MODES],
                                     TX_SIZE max_txfm_size) {
430
431
432
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
433
  vp9_prob skip_prob = vp9_get_pred_prob(cm, xd, PRED_MBSKIP);
434
435
  int64_t rd[TX_SIZE_MAX_SB][2];
  int n, m;
436
  int s0, s1;
437
438
439
440
441
442
443
444
445
446

  for (n = TX_4X4; n <= max_txfm_size; n++) {
    r[n][1] = r[n][0];
    for (m = 0; m <= n - (n == max_txfm_size); m++) {
      if (m == n)
        r[n][1] += vp9_cost_zero(cm->prob_tx[m]);
      else
        r[n][1] += vp9_cost_one(cm->prob_tx[m]);
    }
  }
447

448
449
450
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
451

452
453
454
455
456
457
  for (n = TX_4X4; n <= max_txfm_size; n++) {
    if (s[n]) {
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
458
459
460
    }
  }

461
462
463
464
465
466
  if (max_txfm_size == TX_32X32 &&
      (cm->txfm_mode == ALLOW_32X32 ||
       (cm->txfm_mode == TX_MODE_SELECT &&
        rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
        rd[TX_32X32][1] < rd[TX_4X4][1]))) {
    mbmi->txfm_size = TX_32X32;
467
468
469
470
471
472
  } else if (max_txfm_size >= TX_16X16 &&
             (cm->txfm_mode == ALLOW_16X16 ||
              cm->txfm_mode == ALLOW_32X32 ||
              (cm->txfm_mode == TX_MODE_SELECT &&
               rd[TX_16X16][1] < rd[TX_8X8][1] &&
               rd[TX_16X16][1] < rd[TX_4X4][1]))) {
473
    mbmi->txfm_size = TX_16X16;
474
  } else if (cm->txfm_mode == ALLOW_8X8 ||
475
476
             cm->txfm_mode == ALLOW_16X16 ||
             cm->txfm_mode == ALLOW_32X32 ||
477
           (cm->txfm_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
478
479
480
481
482
    mbmi->txfm_size = TX_8X8;
  } else {
    mbmi->txfm_size = TX_4X4;
  }

483
  *distortion = d[mbmi->txfm_size];
484
  *rate       = r[mbmi->txfm_size][cm->txfm_mode == TX_MODE_SELECT];
485
486
  *skip       = s[mbmi->txfm_size];

487
488
  txfm_cache[ONLY_4X4] = rd[TX_4X4][0];
  txfm_cache[ALLOW_8X8] = rd[TX_8X8][0];
489
490
  txfm_cache[ALLOW_16X16] = rd[MIN(max_txfm_size, TX_16X16)][0];
  txfm_cache[ALLOW_32X32] = rd[MIN(max_txfm_size, TX_32X32)][0];
491
492
493
494
  if (max_txfm_size == TX_32X32 &&
      rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
      rd[TX_32X32][1] < rd[TX_4X4][1])
    txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
495
496
  else if (max_txfm_size >= TX_16X16 &&
           rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
497
    txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
498
  else
499
500
    txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
                                 rd[TX_4X4][1] : rd[TX_8X8][1];
501
502
}

503
504
static int block_error(int16_t *coeff, int16_t *dqcoeff,
                       int block_size, int shift) {
505
506
507
508
  int i;
  int64_t error = 0;

  for (i = 0; i < block_size; i++) {
509
    int this_diff = coeff[i] - dqcoeff[i];
510
    error += (unsigned)this_diff * this_diff;
511
  }
512
  error >>= shift;
513

Frank Galligan's avatar
Frank Galligan committed
514
  return error > INT_MAX ? INT_MAX : (int)error;
515
516
}

517
518
static int block_error_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) {
  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
519
  return block_error(x->plane[0].coeff, x->e_mbd.plane[0].dqcoeff,
520
                     16 << (bwl + bhl), shift);
521
}
522

523
524
525
526
static int block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) {
  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
  int64_t sum = 0;
  int plane;
527

528
529
530
531
532
533
534
535
  for (plane = 1; plane < MAX_MB_PLANE; plane++) {
    const int subsampling = x->e_mbd.plane[plane].subsampling_x +
                            x->e_mbd.plane[plane].subsampling_y;
    sum += block_error(x->plane[plane].coeff, x->e_mbd.plane[plane].dqcoeff,
                       16 << (bwl + bhl - subsampling), 0);
  }
  sum >>= shift;
  return sum > INT_MAX ? INT_MAX : (int)sum;
536
537
}

538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
struct rdcost_block_args {
  VP9_COMMON *cm;
  MACROBLOCK *x;
  ENTROPY_CONTEXT t_above[16];
  ENTROPY_CONTEXT t_left[16];
  TX_SIZE tx_size;
  int bw;
  int bh;
  int cost;
};

static void rdcost_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
                         int ss_txfrm_size, void *arg) {
  struct rdcost_block_args* args = arg;
  int x_idx, y_idx;
  MACROBLOCKD * const xd = &args->x->e_mbd;

  txfrm_block_to_raster_xy(xd, bsize, plane, block, args->tx_size * 2, &x_idx,
                           &y_idx);

  args->cost += cost_coeffs(args->cm, args->x, plane, block,
                            xd->plane[plane].plane_type, args->t_above + x_idx,
                            args->t_left + y_idx, args->tx_size,
                            args->bw * args->bh);
}

static int rdcost_plane(VP9_COMMON * const cm, MACROBLOCK *x, int plane,
                        BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
  MACROBLOCKD * const xd = &x->e_mbd;
567
568
569
  const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
  const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y;
  const int bw = 1 << bwl, bh = 1 << bhl;
570
  struct rdcost_block_args args = { cm, x, { 0 }, { 0 }, tx_size, bw, bh, 0 };
571

572
  vpx_memcpy(&args.t_above, xd->plane[plane].above_context,
573
             sizeof(ENTROPY_CONTEXT) * bw);
574
  vpx_memcpy(&args.t_left, xd->plane[plane].left_context,
575
             sizeof(ENTROPY_CONTEXT) * bh);
576

577
  foreach_transformed_block_in_plane(xd, bsize, plane, rdcost_block, &args);
578

579
  return args.cost;
580
581
}

582
583
584
static int rdcost_uv(VP9_COMMON *const cm, MACROBLOCK *x,
                     BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
  int cost = 0, plane;
585

586
587
  for (plane = 1; plane < MAX_MB_PLANE; plane++) {
    cost += rdcost_plane(cm, x, plane, bsize, tx_size);
588
589
  }
  return cost;
590
591
}

592
593
594
static void super_block_yrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
                                     int *rate, int *distortion, int *skippable,
                                     BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
595
  MACROBLOCKD *const xd = &x->e_mbd;
596
  xd->mode_info_context->mbmi.txfm_size = tx_size;
597
598
599
600
601

  if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
    vp9_encode_intra_block_y(cm, x, bsize);
  else
    vp9_xform_quant_sby(cm, x, bsize);
602

603
604
  *distortion = block_error_sby(x, bsize, tx_size == TX_32X32 ? 0 : 2);
  *rate       = rdcost_plane(cm, x, 0, bsize, tx_size);
605
  *skippable  = vp9_sby_is_skippable(xd, bsize);
606
607
}

608
609
static void super_block_yrd(VP9_COMP *cpi,
                            MACROBLOCK *x, int *rate, int *distortion,
610
                            int *skip, BLOCK_SIZE_TYPE bs,
611
                            int64_t txfm_cache[NB_TXFM_MODES]) {
612
613
  VP9_COMMON *const cm = &cpi->common;
  int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB];
Jim Bankoski's avatar
Jim Bankoski committed
614
615
  MACROBLOCKD *xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
616

617
618
  if (mbmi->ref_frame > INTRA_FRAME)
    vp9_subtract_sby(x, bs);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
619

Jim Bankoski's avatar
Jim Bankoski committed
620
621
622
623
624
625
626
627
628
629
  if (cpi->speed > 4) {
    if (bs >= BLOCK_SIZE_SB32X32) {
      mbmi->txfm_size = TX_32X32;
    } else if (bs >= BLOCK_SIZE_MB16X16) {
      mbmi->txfm_size = TX_16X16;
    } else if (bs >= BLOCK_SIZE_SB8X8) {
      mbmi->txfm_size = TX_8X8;
    } else {
      mbmi->txfm_size = TX_4X4;
    }
630
    vpx_memset(txfm_cache, 0, NB_TXFM_MODES * sizeof(int64_t));
Jim Bankoski's avatar
Jim Bankoski committed
631
632
633
634
    super_block_yrd_for_txfm(cm, x, rate, distortion, skip, bs,
                             mbmi->txfm_size);
    return;
  }
635
  if (bs >= BLOCK_SIZE_SB32X32)
636
637
    super_block_yrd_for_txfm(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32],
                             bs, TX_32X32);
638
  if (bs >= BLOCK_SIZE_MB16X16)
639
640
641
642
643
644
    super_block_yrd_for_txfm(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16],
                             bs, TX_16X16);
  super_block_yrd_for_txfm(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], bs,
                           TX_8X8);
  super_block_yrd_for_txfm(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], bs,
                           TX_4X4);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
645
646

  choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache,
647
                           TX_32X32 - (bs < BLOCK_SIZE_SB32X32)
Jingning Han's avatar
Jingning Han committed
648
                           - (bs < BLOCK_SIZE_MB16X16));
Ronald S. Bultje's avatar
Ronald S. Bultje committed
649
}
Ronald S. Bultje's avatar
Ronald S. Bultje committed
650

651
static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
Yaowu Xu's avatar
Yaowu Xu committed
652
                                     MB_PREDICTION_MODE *best_mode,
653
654
655
                                     int *bmode_costs,
                                     ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                                     int *bestrate, int *bestratey,
656
657
658
                                     int *bestdistortion,
                                     BLOCK_SIZE_TYPE bsize) {
  MB_PREDICTION_MODE mode;
Deb Mukherjee's avatar
Deb Mukherjee committed
659
  MACROBLOCKD *xd = &x->e_mbd;
660
  int64_t best_rd = INT64_MAX;
John Koleszar's avatar
John Koleszar committed
661
662
  int rate = 0;
  int distortion;
663
  VP9_COMMON *const cm = &cpi->common;
John Koleszar's avatar
John Koleszar committed
664
  const int src_stride = x->plane[0].src.stride;
665
666
667
668
669
  uint8_t *src, *dst;
  int16_t *src_diff, *coeff;

  ENTROPY_CONTEXT ta[2], tempa[2];
  ENTROPY_CONTEXT tl[2], templ[2];
Deb Mukherjee's avatar
Deb Mukherjee committed
670
671
  TX_TYPE tx_type = DCT_DCT;
  TX_TYPE best_tx_type = DCT_DCT;
672
673
674
675
  int bw = 1 << b_width_log2(bsize);
  int bh = 1 << b_height_log2(bsize);
  int idx, idy, block;
  DECLARE_ALIGNED(16, int16_t, best_dqcoeff[4][16]);
John Koleszar's avatar
John Koleszar committed
676

Jingning Han's avatar
Jingning Han committed
677
  assert(ib < 4);
678

679
680
  vpx_memcpy(ta, a, sizeof(ta));
  vpx_memcpy(tl, l, sizeof(tl));
681
  xd->mode_info_context->mbmi.txfm_size = TX_4X4;
682
683

  for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
684
    int64_t this_rd;
685
    int ratey = 0;
686

687
    rate = bmode_costs[mode];
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
    distortion = 0;

    vpx_memcpy(tempa, ta, sizeof(ta));
    vpx_memcpy(templ, tl, sizeof(tl));

    for (idy = 0; idy < bh; ++idy) {
      for (idx = 0; idx < bw; ++idx) {
        block = ib + idy * 2 + idx;
        xd->mode_info_context->bmi[block].as_mode.first = mode;
        src = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
                                        x->plane[0].src.buf, src_stride);
        src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, block,
                                             x->plane[0].src_diff);
        coeff = BLOCK_OFFSET(x->plane[0].coeff, block, 16);
        dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
                                        xd->plane[0].dst.buf,
                                        xd->plane[0].dst.stride);
705
706
        vp9_intra4x4_predict(xd, block, BLOCK_SIZE_SB8X8, mode,
                             dst, xd->plane[0].dst.stride);
707
708
709
710
711
712
713
714
715
716
717
718
        vp9_subtract_block(4, 4, src_diff, 8,
                           src, src_stride,
                           dst, xd->plane[0].dst.stride);

        tx_type = get_tx_type_4x4(xd, block);
        if (tx_type != DCT_DCT) {
          vp9_short_fht4x4(src_diff, coeff, 8, tx_type);
          x->quantize_b_4x4(x, block, tx_type, 16);
        } else {
          x->fwd_txm4x4(src_diff, coeff, 16);
          x->quantize_b_4x4(x, block, tx_type, 16);
        }
John Koleszar's avatar
John Koleszar committed
719

720
721
722
723
        ratey += cost_coeffs(cm, x, 0, block, PLANE_TYPE_Y_WITH_DC,
                             tempa + idx, templ + idy, TX_4X4, 16);
        distortion += vp9_block_error(coeff, BLOCK_OFFSET(xd->plane[0].dqcoeff,
                                                         block, 16), 16) >> 2;
John Koleszar's avatar
John Koleszar committed
724

725
726
727
728
729
730
731
732
        if (best_tx_type != DCT_DCT)
          vp9_short_iht4x4_add(BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
                               dst, xd->plane[0].dst.stride, best_tx_type);
        else
          xd->inv_txm4x4_add(BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
                             dst, xd->plane[0].dst.stride);
      }
    }
Jingning Han's avatar
Jingning Han committed
733

734
735
    rate += ratey;
    this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
Jingning Han's avatar
Jingning Han committed
736

737
738
739
740
741
742
743
    if (this_rd < best_rd) {
      *bestrate = rate;
      *bestratey = ratey;
      *bestdistortion = distortion;
      best_rd = this_rd;
      *best_mode = mode;
      best_tx_type = tx_type;
744
745
746
747
748
749
750
751
752
753
      vpx_memcpy(a, tempa, sizeof(tempa));
      vpx_memcpy(l, templ, sizeof(templ));
      for (idy = 0; idy < bh; ++idy) {
        for (idx = 0; idx < bw; ++idx) {
          block = ib + idy * 2 + idx;
          vpx_memcpy(best_dqcoeff[idy * 2 + idx],
                     BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
                     sizeof(best_dqcoeff[0]));
        }
      }
John Koleszar's avatar
John Koleszar committed
754
    }
John Koleszar's avatar
John Koleszar committed
755
  }
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774

  for (idy = 0; idy < bh; ++idy) {
    for (idx = 0; idx < bw; ++idx) {
      block = ib + idy * 2 + idx;
      xd->mode_info_context->bmi[block].as_mode.first = *best_mode;
      dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
                                      xd->plane[0].dst.buf,
                                      xd->plane[0].dst.stride);

      vp9_intra4x4_predict(xd, block, BLOCK_SIZE_SB8X8, *best_mode,
                           dst, xd->plane[0].dst.stride);
      // inverse transform
      if (best_tx_type != DCT_DCT)
        vp9_short_iht4x4_add(best_dqcoeff[idy * 2 + idx], dst,
                             xd->plane[0].dst.stride, best_tx_type);
      else
        xd->inv_txm4x4_add(best_dqcoeff[idy * 2 + idx], dst,
                           xd->plane[0].dst.stride);
    }
Scott LaVarnway's avatar
Scott LaVarnway committed
775
  }
John Koleszar's avatar
John Koleszar committed
776

John Koleszar's avatar
John Koleszar committed
777
  return best_rd;
John Koleszar's avatar
John Koleszar committed
778
779
}

780
781
static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
                                         int *Rate, int *rate_y,
782
                                         int *Distortion, int64_t best_rd) {
783
  int i, j;
John Koleszar's avatar
John Koleszar committed
784
  MACROBLOCKD *const xd = &mb->e_mbd;
785
786
787
788
  BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type;
  int bw = 1 << b_width_log2(bsize);
  int bh = 1 << b_height_log2(bsize);
  int idx, idy;
789
  int cost = 0;
John Koleszar's avatar
John Koleszar committed
790
791
792
  int distortion = 0;
  int tot_rate_y = 0;
  int64_t total_rd = 0;
793
  ENTROPY_CONTEXT t_above[4], t_left[4];
John Koleszar's avatar
John Koleszar committed
794
  int *bmode_costs;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
795
  MODE_INFO *const mic = xd->mode_info_context;
John Koleszar's avatar
John Koleszar committed
796

797
798
  vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
  vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
John Koleszar's avatar
John Koleszar committed
799

800
  bmode_costs = mb->mbmode_cost;
801

802
803
804
  for (idy = 0; idy < 2; idy += bh) {
    for (idx = 0; idx < 2; idx += bw) {
      const int mis = xd->mode_info_stride;
Yaowu Xu's avatar
Yaowu Xu committed
805
      MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
806
807
808
809
810
811
812
813
814
      int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry);
      int UNINITIALIZED_IS_SAFE(d);
      i = idy * 2 + idx;

      if (xd->frame_type == KEY_FRAME) {
        const MB_PREDICTION_MODE A = above_block_mode(mic, i, mis);
        const MB_PREDICTION_MODE L = (xd->left_available || idx) ?
                                     left_block_mode(mic, i) : DC_PRED;

815
        bmode_costs  = mb->y_mode_costs[A][L];
816
      }
817

818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
      total_rd += rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs,
                                        t_above + idx, t_left + idy,
                                        &r, &ry, &d, bsize);
      cost += r;
      distortion += d;
      tot_rate_y += ry;

      mic->bmi[i].as_mode.first = best_mode;
      for (j = 1; j < bh; ++j)
        mic->bmi[i + j * 2].as_mode.first = best_mode;
      for (j = 1; j < bw; ++j)
        mic->bmi[i + j].as_mode.first = best_mode;

      if (total_rd >= best_rd)
        break;
John Koleszar's avatar
John Koleszar committed
833
834
    }
  }
John Koleszar's avatar
John Koleszar committed
835

836
  if (total_rd >= best_rd)
837
    return INT64_MAX;
838

John Koleszar's avatar
John Koleszar committed
839
  *Rate = cost;
840
  *rate_y = tot_rate_y;
John Koleszar's avatar
John Koleszar committed
841
  *Distortion = distortion;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
842
  xd->mode_info_context->mbmi.mode = mic->bmi[3].as_mode.first;
John Koleszar's avatar
John Koleszar committed
843

John Koleszar's avatar
John Koleszar committed
844
  return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
John Koleszar's avatar
John Koleszar committed
845
}
846

847
848
849
850
static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
                                      int *rate, int *rate_tokenonly,
                                      int *distortion, int *skippable,
                                      BLOCK_SIZE_TYPE bsize,
851
                                      int64_t txfm_cache[NB_TXFM_MODES]) {
Ronald S. Bultje's avatar
Ronald S. Bultje committed
852
853
  MB_PREDICTION_MODE mode;
  MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
Jim Bankoski's avatar
Jim Bankoski committed
854
  MACROBLOCKD *const xd = &x->e_mbd;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
855
  int this_rate, this_rate_tokenonly;
856
  int this_distortion, s;
857
  int64_t best_rd = INT64_MAX, this_rd;
858
859
  TX_SIZE UNINITIALIZED_IS_SAFE(best_tx);
  int i;
860
  int *bmode_costs = x->mbmode_cost;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
861

862
863
864
865
866
  if (bsize < BLOCK_SIZE_SB8X8) {
    x->e_mbd.mode_info_context->mbmi.txfm_size = TX_4X4;
    return best_rd;
  }

867
868
  for (i = 0; i < NB_TXFM_MODES; i++)
    txfm_cache[i] = INT64_MAX;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
869
870
871

  /* Y Search for 32x32 intra prediction mode */
  for (mode = DC_PRED; mode <= TM_PRED; mode++) {
872
    int64_t local_txfm_cache[NB_TXFM_MODES];
873
874
    MODE_INFO *const mic = xd->mode_info_context;
    const int mis = xd->mode_info_stride;
875

876
877
878
879
880
881
882
    if (cpi->common.frame_type == KEY_FRAME) {
      const MB_PREDICTION_MODE A = above_block_mode(mic, 0, mis);
      const MB_PREDICTION_MODE L = xd->left_available ?
                                   left_block_mode(mic, 0) : DC_PRED;

      bmode_costs = x->y_mode_costs[A][L];
    }
Ronald S. Bultje's avatar
Ronald S. Bultje committed
883
884
    x->e_mbd.mode_info_context->mbmi.mode = mode;

885
886
    super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s,
                    bsize, local_txfm_cache);
Jim Bankoski's avatar
Jim Bankoski committed
887

888
    this_rate = this_rate_tokenonly + bmode_costs[mode];
Ronald S. Bultje's avatar
Ronald S. Bultje committed
889
890
891
892
893
    this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);

    if (this_rd < best_rd) {
      mode_selected   = mode;
      best_rd         = this_rd;
894
      best_tx         = x->e_mbd.mode_info_context->mbmi.txfm_size;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
895
896
897
898
899
      *rate           = this_rate;
      *rate_tokenonly = this_rate_tokenonly;
      *distortion     = this_distortion;
      *skippable      = s;
    }
900

901
902
    for (i = 0; i < NB_TXFM_MODES; i++) {
      int64_t adj_rd = this_rd + local_txfm_cache[i] -
903
                       local_txfm_cache[cpi->common.txfm_mode];
904
905
      if (adj_rd < txfm_cache[i]) {
        txfm_cache[i] = adj_rd;