vp9_rdopt.c 106 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
11
12
13
14
15
 */


#include <stdio.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
16

17
#include "vp9/common/vp9_pragmas.h"
18
19
20
21
22
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_modecosts.h"
#include "vp9/encoder/vp9_encodeintra.h"
23
24
25
26
27
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_quant_common.h"
28
29
30
31
32
33
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_variance.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_ratectrl.h"
John Koleszar's avatar
John Koleszar committed
34
#include "vpx_mem/vpx_mem.h"
35
36
37
38
39
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_entropy.h"
40
#include "vp9_rtcd.h"
41
#include "vp9/common/vp9_mvref_common.h"
Ronald S. Bultje's avatar
Ronald S. Bultje committed
42
#include "vp9/common/vp9_common.h"
Paul Wilkins's avatar
Paul Wilkins committed
43

44
45
#define INVALID_MV 0x80008000

46
47
48
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1

John Koleszar's avatar
John Koleszar committed
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
static const int auto_speed_thresh[17] = {
  1000,
  200,
  150,
  130,
  150,
  125,
  120,
  115,
  115,
  115,
  115,
  115,
  115,
  115,
  115,
  115,
  105
John Koleszar's avatar
John Koleszar committed
67
68
};

69
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
70
71
  {ZEROMV,    LAST_FRAME,   NONE},
  {DC_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
72

73
74
  {NEARESTMV, LAST_FRAME,   NONE},
  {NEARMV,    LAST_FRAME,   NONE},
John Koleszar's avatar
John Koleszar committed
75

76
77
  {ZEROMV,    GOLDEN_FRAME, NONE},
  {NEARESTMV, GOLDEN_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
78

79
80
  {ZEROMV,    ALTREF_FRAME, NONE},
  {NEARESTMV, ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
81

82
83
  {NEARMV,    GOLDEN_FRAME, NONE},
  {NEARMV,    ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
84

85
86
87
88
89
90
91
92
  {V_PRED,    INTRA_FRAME,  NONE},
  {H_PRED,    INTRA_FRAME,  NONE},
  {D45_PRED,  INTRA_FRAME,  NONE},
  {D135_PRED, INTRA_FRAME,  NONE},
  {D117_PRED, INTRA_FRAME,  NONE},
  {D153_PRED, INTRA_FRAME,  NONE},
  {D27_PRED,  INTRA_FRAME,  NONE},
  {D63_PRED,  INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
93

94
  {TM_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
95

96
97
98
  {NEWMV,     LAST_FRAME,   NONE},
  {NEWMV,     GOLDEN_FRAME, NONE},
  {NEWMV,     ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
99

100
101
102
  {SPLITMV,   LAST_FRAME,   NONE},
  {SPLITMV,   GOLDEN_FRAME, NONE},
  {SPLITMV,   ALTREF_FRAME, NONE},
103

Yaowu Xu's avatar
Yaowu Xu committed
104
  {I4X4_PRED,    INTRA_FRAME,  NONE},
105

John Koleszar's avatar
John Koleszar committed
106
107
108
109
  /* compound prediction modes */
  {ZEROMV,    LAST_FRAME,   GOLDEN_FRAME},
  {NEARESTMV, LAST_FRAME,   GOLDEN_FRAME},
  {NEARMV,    LAST_FRAME,   GOLDEN_FRAME},
110

John Koleszar's avatar
John Koleszar committed
111
112
113
  {ZEROMV,    ALTREF_FRAME, LAST_FRAME},
  {NEARESTMV, ALTREF_FRAME, LAST_FRAME},
  {NEARMV,    ALTREF_FRAME, LAST_FRAME},
114

John Koleszar's avatar
John Koleszar committed
115
116
117
  {ZEROMV,    GOLDEN_FRAME, ALTREF_FRAME},
  {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
  {NEARMV,    GOLDEN_FRAME, ALTREF_FRAME},
118

John Koleszar's avatar
John Koleszar committed
119
120
121
  {NEWMV,     LAST_FRAME,   GOLDEN_FRAME},
  {NEWMV,     ALTREF_FRAME, LAST_FRAME  },
  {NEWMV,     GOLDEN_FRAME, ALTREF_FRAME},
122

John Koleszar's avatar
John Koleszar committed
123
124
  {SPLITMV,   LAST_FRAME,   GOLDEN_FRAME},
  {SPLITMV,   ALTREF_FRAME, LAST_FRAME  },
125
  {SPLITMV,   GOLDEN_FRAME, ALTREF_FRAME},
John Koleszar's avatar
John Koleszar committed
126
127
};

128
129
static void fill_token_costs(vp9_coeff_count *c,
                             vp9_coeff_probs *p,
130
                             TX_SIZE tx_size) {
131
  int i, j, k, l;
John Koleszar's avatar
John Koleszar committed
132

133
  for (i = 0; i < BLOCK_TYPES; i++)
134
135
    for (j = 0; j < REF_TYPES; j++)
      for (k = 0; k < COEF_BANDS; k++)
136
137
        for (l = 0; l < PREV_COEF_CONTEXTS; l++)
          vp9_cost_tokens_skip((int *)c[i][j][k][l], p[i][j][k][l],
138
139
140
                               vp9_coef_tree);
}

141
142
143
144
static int rd_iifactor[32] =  { 4, 4, 3, 2, 1, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0, };
John Koleszar's avatar
John Koleszar committed
145

146
// 3* dc_qlookup[Q]*dc_qlookup[Q];
147

148
/* values are now correlated to quantizer */
Paul Wilkins's avatar
Paul Wilkins committed
149
150
151
static int sad_per_bit16lut[QINDEX_RANGE];
static int sad_per_bit4lut[QINDEX_RANGE];

152
void vp9_init_me_luts() {
John Koleszar's avatar
John Koleszar committed
153
154
155
156
157
158
159
  int i;

  // Initialize the sad lut tables using a formulaic calculation for now
  // This is to make it easier to resolve the impact of experimental changes
  // to the quantizer tables.
  for (i = 0; i < QINDEX_RANGE; i++) {
    sad_per_bit16lut[i] =
160
      (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
161
    sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742);
John Koleszar's avatar
John Koleszar committed
162
  }
Paul Wilkins's avatar
Paul Wilkins committed
163
}
John Koleszar's avatar
John Koleszar committed
164

165
static int compute_rd_mult(int qindex) {
166
  const int q = vp9_dc_quant(qindex, 0);
167
  return (11 * q * q) >> 2;
168
169
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
170
171
172
void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
  cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
  cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
John Koleszar's avatar
John Koleszar committed
173
174
}

175

Dmitry Kovalev's avatar
Dmitry Kovalev committed
176
void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
177
  int q, i;
John Koleszar's avatar
John Koleszar committed
178

179
  vp9_clear_system_state();  // __asm emms;
John Koleszar's avatar
John Koleszar committed
180

John Koleszar's avatar
John Koleszar committed
181
182
183
184
  // Further tests required to see if optimum is different
  // for key frames, golden frames and arf frames.
  // if (cpi->common.refresh_golden_frame ||
  //     cpi->common.refresh_alt_ref_frame)
185
  qindex = clamp(qindex, 0, MAXQ);
186

Dmitry Kovalev's avatar
Dmitry Kovalev committed
187
  cpi->RDMULT = compute_rd_mult(qindex);
John Koleszar's avatar
John Koleszar committed
188
189
190
191
192
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
    if (cpi->twopass.next_iiratio > 31)
      cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
    else
      cpi->RDMULT +=
Dmitry Kovalev's avatar
Dmitry Kovalev committed
193
          (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
John Koleszar's avatar
John Koleszar committed
194
  }
195
  cpi->mb.errorperbit = cpi->RDMULT >> 6;
John Koleszar's avatar
John Koleszar committed
196
  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
197

198
  vp9_set_speed_features(cpi);
John Koleszar's avatar
John Koleszar committed
199

Dmitry Kovalev's avatar
Dmitry Kovalev committed
200
201
  q = (int)pow(vp9_dc_quant(qindex, 0) >> 2, 1.25);
  q <<= 2;
John Koleszar's avatar
John Koleszar committed
202
203
  if (q < 8)
    q = 8;
204

John Koleszar's avatar
John Koleszar committed
205
206
207
  if (cpi->RDMULT > 1000) {
    cpi->RDDIV = 1;
    cpi->RDMULT /= 100;
John Koleszar's avatar
John Koleszar committed
208

John Koleszar's avatar
John Koleszar committed
209
210
211
212
213
214
215
    for (i = 0; i < MAX_MODES; i++) {
      if (cpi->sf.thresh_mult[i] < INT_MAX) {
        cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
      } else {
        cpi->rd_threshes[i] = INT_MAX;
      }
      cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
John Koleszar's avatar
John Koleszar committed
216
    }
John Koleszar's avatar
John Koleszar committed
217
218
  } else {
    cpi->RDDIV = 100;
John Koleszar's avatar
John Koleszar committed
219

John Koleszar's avatar
John Koleszar committed
220
221
222
223
224
225
226
    for (i = 0; i < MAX_MODES; i++) {
      if (cpi->sf.thresh_mult[i] < (INT_MAX / q)) {
        cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
      } else {
        cpi->rd_threshes[i] = INT_MAX;
      }
      cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
John Koleszar's avatar
John Koleszar committed
227
    }
John Koleszar's avatar
John Koleszar committed
228
  }
John Koleszar's avatar
John Koleszar committed
229

230
  fill_token_costs(cpi->mb.token_costs[TX_4X4],
231
                   cpi->common.fc.coef_probs_4x4, TX_4X4);
232
  fill_token_costs(cpi->mb.token_costs[TX_8X8],
233
                   cpi->common.fc.coef_probs_8x8, TX_8X8);
234
  fill_token_costs(cpi->mb.token_costs[TX_16X16],
235
                   cpi->common.fc.coef_probs_16x16, TX_16X16);
236
  fill_token_costs(cpi->mb.token_costs[TX_32X32],
237
                   cpi->common.fc.coef_probs_32x32, TX_32X32);
238

239
  for (i = 0; i < NUM_PARTITION_CONTEXTS; i++)
240
241
242
243
    vp9_cost_tokens(cpi->mb.partition_cost[i],
                    cpi->common.fc.partition_prob[i],
                    vp9_partition_tree);

John Koleszar's avatar
John Koleszar committed
244
245
  /*rough estimate for costing*/
  cpi->common.kf_ymode_probs_index = cpi->common.base_qindex >> 4;
246
  vp9_init_mode_costs(cpi);
John Koleszar's avatar
John Koleszar committed
247

248
  if (cpi->common.frame_type != KEY_FRAME) {
249
    vp9_build_nmv_cost_table(
250
251
252
253
254
255
        cpi->mb.nmvjointcost,
        cpi->mb.e_mbd.allow_high_precision_mv ?
        cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
        &cpi->common.fc.nmvc,
        cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
  }
John Koleszar's avatar
John Koleszar committed
256
257
}

258
int vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, int block_size) {
259
  int i, error = 0;
John Koleszar's avatar
John Koleszar committed
260

261
  for (i = 0; i < block_size; i++) {
John Koleszar's avatar
John Koleszar committed
262
263
264
    int this_diff = coeff[i] - dqcoeff[i];
    error += this_diff * this_diff;
  }
John Koleszar's avatar
John Koleszar committed
265

John Koleszar's avatar
John Koleszar committed
266
  return error;
John Koleszar's avatar
John Koleszar committed
267
268
}

269
static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
270
                              int plane, int block, PLANE_TYPE type,
271
272
                              ENTROPY_CONTEXT *A,
                              ENTROPY_CONTEXT *L,
John Koleszar's avatar
John Koleszar committed
273
274
                              TX_SIZE tx_size,
                              int y_blocks) {
275
  MACROBLOCKD *const xd = &mb->e_mbd;
276
277
  MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
  int pt;
278
  int c = 0;
279
280
  int cost = 0, pad;
  const int *scan, *nb;
281
282
283
  const int eob = xd->plane[plane].eobs[block];
  const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff,
                                           block, 16);
284
  const int ref = mbmi->ref_frame != INTRA_FRAME;
285
  unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
286
      mb->token_costs[tx_size][type][ref];
287
  ENTROPY_CONTEXT above_ec, left_ec;
288
  TX_TYPE tx_type = DCT_DCT;
289

290
  const int segment_id = xd->mode_info_context->mbmi.segment_id;
291
292
293
294
  vp9_prob (*coef_probs)[REF_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS]
                        [ENTROPY_NODES];
  int seg_eob, default_eob;
  uint8_t token_cache[1024];
295
296

  // Check for consistency of tx_size with mode info
297
  assert((!type && !plane) || (type && plane));
298
299
300
301
302
303
304
  if (type == PLANE_TYPE_Y_WITH_DC) {
    assert(xd->mode_info_context->mbmi.txfm_size == tx_size);
  } else {
    TX_SIZE tx_size_uv = get_uv_tx_size(xd);
    assert(tx_size == tx_size_uv);
  }

305
  switch (tx_size) {
306
    case TX_4X4: {
307
      tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
308
          get_tx_type_4x4(xd, block) : DCT_DCT;
309
310
      above_ec = A[0] != 0;
      left_ec = L[0] != 0;
311
      coef_probs = cm->fc.coef_probs_4x4;
312
      seg_eob = 16;
313
      scan = get_scan_4x4(tx_type);
Daniel Kang's avatar
Daniel Kang committed
314
      break;
315
    }
316
317
    case TX_8X8: {
      const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
318
      const int sz = 1 + b_width_log2(sb_type);
319
      const int x = block & ((1 << sz) - 1), y = block - x;
320
321
      TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
          get_tx_type_8x8(xd, y + (x >> 1)) : DCT_DCT;
322
323
      above_ec = (A[0] + A[1]) != 0;
      left_ec = (L[0] + L[1]) != 0;
324
      scan = get_scan_8x8(tx_type);
325
      coef_probs = cm->fc.coef_probs_8x8;
326
      seg_eob = 64;
Daniel Kang's avatar
Daniel Kang committed
327
      break;
328
329
330
    }
    case TX_16X16: {
      const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
331
      const int sz = 2 + b_width_log2(sb_type);
332
      const int x = block & ((1 << sz) - 1), y = block - x;
333
334
      TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
          get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT;
335
      scan = get_scan_16x16(tx_type);
336
      coef_probs = cm->fc.coef_probs_16x16;
337
      seg_eob = 256;
338
339
      above_ec = (A[0] + A[1] + A[2] + A[3]) != 0;
      left_ec = (L[0] + L[1] + L[2] + L[3]) != 0;
Daniel Kang's avatar
Daniel Kang committed
340
      break;
341
    }
342
343
    case TX_32X32:
      scan = vp9_default_zig_zag1d_32x32;
344
      coef_probs = cm->fc.coef_probs_32x32;
345
      seg_eob = 1024;
346
347
      above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0;
      left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0;
348
      break;
Daniel Kang's avatar
Daniel Kang committed
349
    default:
350
      abort();
Daniel Kang's avatar
Daniel Kang committed
351
352
      break;
  }
John Koleszar's avatar
John Koleszar committed
353
  assert(eob <= seg_eob);
354

355
  pt = combine_entropy_contexts(above_ec, left_ec);
356
357
  nb = vp9_get_coef_neighbors_handle(scan, &pad);
  default_eob = seg_eob;
358

359
360
  if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))
    seg_eob = 0;
361

362
363
364
365
  /* sanity check to ensure that we do not have spurious non-zero q values */
  if (eob < seg_eob)
    assert(qcoeff_ptr[scan[eob]] == 0);

366
  {
367
    for (c = 0; c < eob; c++) {
368
      int v = qcoeff_ptr[scan[c]];
369
      int t = vp9_dct_value_tokens_ptr[v].token;
370
371
372
      int band = get_coef_band(scan, tx_size, c);
      if (c)
        pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
373

374
      cost += token_costs[band][pt][t] + vp9_dct_value_cost_ptr[v];
375

376
377
      if (!c || token_cache[scan[c - 1]])
        cost += vp9_cost_bit(coef_probs[type][ref][band][pt][0], 1);
378
      token_cache[scan[c]] = t;
379
    }
380
381
382
383
384
385
386
    if (c < seg_eob) {
      if (c)
        pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
      cost += mb->token_costs[tx_size][type][ref]
          [get_coef_band(scan, tx_size, c)]
          [pt][DCT_EOB_TOKEN];
    }
387
388
  }

389
390
391
  // is eob first coefficient;
  for (pt = 0; pt < (1 << tx_size); pt++) {
    A[pt] = L[pt] = c > 0;
392
  }
393

394
395
396
  return cost;
}

397
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
398
399
400
401
402
                                     int (*r)[2], int *rate,
                                     int *d, int *distortion,
                                     int *s, int *skip,
                                     int64_t txfm_cache[NB_TXFM_MODES],
                                     TX_SIZE max_txfm_size) {
403
404
405
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
406
  vp9_prob skip_prob = vp9_get_pred_prob(cm, xd, PRED_MBSKIP);
407
408
  int64_t rd[TX_SIZE_MAX_SB][2];
  int n, m;
409
  int s0, s1;
410
411
412
413
414
415
416
417
418
419

  for (n = TX_4X4; n <= max_txfm_size; n++) {
    r[n][1] = r[n][0];
    for (m = 0; m <= n - (n == max_txfm_size); m++) {
      if (m == n)
        r[n][1] += vp9_cost_zero(cm->prob_tx[m]);
      else
        r[n][1] += vp9_cost_one(cm->prob_tx[m]);
    }
  }
420

421
422
423
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
424

425
426
427
428
429
430
  for (n = TX_4X4; n <= max_txfm_size; n++) {
    if (s[n]) {
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
431
432
433
    }
  }

434
435
436
437
438
439
  if (max_txfm_size == TX_32X32 &&
      (cm->txfm_mode == ALLOW_32X32 ||
       (cm->txfm_mode == TX_MODE_SELECT &&
        rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
        rd[TX_32X32][1] < rd[TX_4X4][1]))) {
    mbmi->txfm_size = TX_32X32;
440
441
442
443
444
445
  } else if (max_txfm_size >= TX_16X16 &&
             (cm->txfm_mode == ALLOW_16X16 ||
              cm->txfm_mode == ALLOW_32X32 ||
              (cm->txfm_mode == TX_MODE_SELECT &&
               rd[TX_16X16][1] < rd[TX_8X8][1] &&
               rd[TX_16X16][1] < rd[TX_4X4][1]))) {
446
    mbmi->txfm_size = TX_16X16;
447
  } else if (cm->txfm_mode == ALLOW_8X8 ||
448
449
             cm->txfm_mode == ALLOW_16X16 ||
             cm->txfm_mode == ALLOW_32X32 ||
450
           (cm->txfm_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
451
452
453
454
455
    mbmi->txfm_size = TX_8X8;
  } else {
    mbmi->txfm_size = TX_4X4;
  }

456
  *distortion = d[mbmi->txfm_size];
457
  *rate       = r[mbmi->txfm_size][cm->txfm_mode == TX_MODE_SELECT];
458
459
  *skip       = s[mbmi->txfm_size];

460
461
  txfm_cache[ONLY_4X4] = rd[TX_4X4][0];
  txfm_cache[ALLOW_8X8] = rd[TX_8X8][0];
462
463
  txfm_cache[ALLOW_16X16] = rd[MIN(max_txfm_size, TX_16X16)][0];
  txfm_cache[ALLOW_32X32] = rd[MIN(max_txfm_size, TX_32X32)][0];
464
465
466
467
  if (max_txfm_size == TX_32X32 &&
      rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
      rd[TX_32X32][1] < rd[TX_4X4][1])
    txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
468
469
  else if (max_txfm_size >= TX_16X16 &&
           rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
470
    txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
471
  else
472
473
    txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
                                 rd[TX_4X4][1] : rd[TX_8X8][1];
474
475
}

476
477
static int block_error(int16_t *coeff, int16_t *dqcoeff,
                       int block_size, int shift) {
478
479
480
481
  int i;
  int64_t error = 0;

  for (i = 0; i < block_size; i++) {
482
    int this_diff = coeff[i] - dqcoeff[i];
483
    error += (unsigned)this_diff * this_diff;
484
  }
485
  error >>= shift;
486

Frank Galligan's avatar
Frank Galligan committed
487
  return error > INT_MAX ? INT_MAX : (int)error;
488
489
}

490
491
static int block_error_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) {
  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
492
  return block_error(x->plane[0].coeff, x->e_mbd.plane[0].dqcoeff,
493
                     16 << (bwl + bhl), shift);
494
}
495

496
497
498
499
static int block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) {
  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
  int64_t sum = 0;
  int plane;
500

501
502
503
504
505
506
507
508
  for (plane = 1; plane < MAX_MB_PLANE; plane++) {
    const int subsampling = x->e_mbd.plane[plane].subsampling_x +
                            x->e_mbd.plane[plane].subsampling_y;
    sum += block_error(x->plane[plane].coeff, x->e_mbd.plane[plane].dqcoeff,
                       16 << (bwl + bhl - subsampling), 0);
  }
  sum >>= shift;
  return sum > INT_MAX ? INT_MAX : (int)sum;
509
510
}

511
512
static int rdcost_plane(VP9_COMMON *const cm, MACROBLOCK *x,
                        int plane, BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
513
  MACROBLOCKD *const xd = &x->e_mbd;
514
515
516
  const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
  const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y;
  const int bw = 1 << bwl, bh = 1 << bhl;
517
  ENTROPY_CONTEXT t_above[16], t_left[16];
518
  int block, cost;
519

520
  vpx_memcpy(&t_above, xd->plane[plane].above_context,
521
             sizeof(ENTROPY_CONTEXT) * bw);
522
  vpx_memcpy(&t_left,  xd->plane[plane].left_context,
523
             sizeof(ENTROPY_CONTEXT) * bh);
524

525
526
527
  cost = 0;
  for (block = 0; block < bw * bh; block += 1 << (tx_size * 2)) {
    int x_idx, y_idx;
528

529
530
    txfrm_block_to_raster_xy(xd, bsize, plane, block, tx_size * 2,
                             &x_idx, &y_idx);
531

532
533
534
    cost += cost_coeffs(cm, x, plane, block, xd->plane[plane].plane_type,
                        t_above + x_idx, t_left + y_idx,
                        tx_size, bw * bh);
535
  }
536
537
538
539

  return cost;
}

540
541
542
static int rdcost_uv(VP9_COMMON *const cm, MACROBLOCK *x,
                     BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
  int cost = 0, plane;
543

544
545
  for (plane = 1; plane < MAX_MB_PLANE; plane++) {
    cost += rdcost_plane(cm, x, plane, bsize, tx_size);
546
547
  }
  return cost;
548
549
}

550
551
552
static void super_block_yrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
                                     int *rate, int *distortion, int *skippable,
                                     BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
553
  MACROBLOCKD *const xd = &x->e_mbd;
554
555
  xd->mode_info_context->mbmi.txfm_size = tx_size;
  vp9_xform_quant_sby(cm, x, bsize);
556

557
558
  *distortion = block_error_sby(x, bsize, tx_size == TX_32X32 ? 0 : 2);
  *rate       = rdcost_plane(cm, x, 0, bsize, tx_size);
559
  *skippable  = vp9_sby_is_skippable(xd, bsize);
560
561
}

562
563
static void super_block_yrd(VP9_COMP *cpi,
                            MACROBLOCK *x, int *rate, int *distortion,
564
                            int *skip, BLOCK_SIZE_TYPE bs,
565
                            int64_t txfm_cache[NB_TXFM_MODES]) {
566
567
  VP9_COMMON *const cm = &cpi->common;
  int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB];
Ronald S. Bultje's avatar
Ronald S. Bultje committed
568

569
  vp9_subtract_sby(x, bs);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
570

571
  if (bs >= BLOCK_SIZE_SB32X32)
572
573
    super_block_yrd_for_txfm(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32],
                             bs, TX_32X32);
574
  if (bs >= BLOCK_SIZE_MB16X16)
575
576
577
578
579
580
    super_block_yrd_for_txfm(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16],
                             bs, TX_16X16);
  super_block_yrd_for_txfm(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], bs,
                           TX_8X8);
  super_block_yrd_for_txfm(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], bs,
                           TX_4X4);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
581
582

  choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache,
583
                           TX_32X32 - (bs < BLOCK_SIZE_SB32X32)
Jingning Han's avatar
Jingning Han committed
584
                           - (bs < BLOCK_SIZE_MB16X16));
Ronald S. Bultje's avatar
Ronald S. Bultje committed
585
}
Ronald S. Bultje's avatar
Ronald S. Bultje committed
586

587
588
static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
                                     B_PREDICTION_MODE *best_mode,
589
590
591
592
                                     int *bmode_costs,
                                     ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                                     int *bestrate, int *bestratey,
                                     int *bestdistortion) {
John Koleszar's avatar
John Koleszar committed
593
  B_PREDICTION_MODE mode;
Deb Mukherjee's avatar
Deb Mukherjee committed
594
  MACROBLOCKD *xd = &x->e_mbd;
595
  int64_t best_rd = INT64_MAX;
John Koleszar's avatar
John Koleszar committed
596
597
  int rate = 0;
  int distortion;
598
  VP9_COMMON *const cm = &cpi->common;
John Koleszar's avatar
John Koleszar committed
599
600
  const int src_stride = x->plane[0].src.stride;
  uint8_t* const src =
601
602
603
      raster_block_offset_uint8(xd,
                                BLOCK_SIZE_SB8X8,
                                0, ib,
John Koleszar's avatar
John Koleszar committed
604
                                x->plane[0].src.buf, src_stride);
605
  int16_t* const src_diff =
606
607
608
      raster_block_offset_int16(xd,
                                BLOCK_SIZE_SB8X8,
                                0, ib,
609
                                x->plane[0].src_diff);
John Koleszar's avatar
John Koleszar committed
610
  int16_t* const diff =
611
612
613
      raster_block_offset_int16(xd,
                                BLOCK_SIZE_SB8X8,
                                0, ib,
John Koleszar's avatar
John Koleszar committed
614
                                xd->plane[0].diff);
John Koleszar's avatar
John Koleszar committed
615
  int16_t* const coeff = BLOCK_OFFSET(x->plane[0].coeff, ib, 16);
616
  uint8_t* const dst =
617
618
619
      raster_block_offset_uint8(xd,
                                BLOCK_SIZE_SB8X8,
                                0, ib,
620
                                xd->plane[0].dst.buf, xd->plane[0].dst.stride);
John Koleszar's avatar
John Koleszar committed
621
622
  ENTROPY_CONTEXT ta = *a, tempa = *a;
  ENTROPY_CONTEXT tl = *l, templ = *l;
Deb Mukherjee's avatar
Deb Mukherjee committed
623
624
  TX_TYPE tx_type = DCT_DCT;
  TX_TYPE best_tx_type = DCT_DCT;
John Koleszar's avatar
John Koleszar committed
625
626
627
628
629
  /*
   * The predictor buffer is a 2d buffer with a stride of 16.  Create
   * a temp buffer that meets the stride requirements, but we are only
   * interested in the left 4x4 block
   * */
630
  DECLARE_ALIGNED_ARRAY(16, int16_t, best_dqcoeff, 16);
John Koleszar's avatar
John Koleszar committed
631

Jingning Han's avatar
Jingning Han committed
632
  assert(ib < 4);
633

634
  xd->mode_info_context->mbmi.txfm_size = TX_4X4;
635
  for (mode = B_DC_PRED; mode < LEFT4X4; mode++) {
636
637
    int64_t this_rd;
    int ratey;
638

Scott LaVarnway's avatar
Scott LaVarnway committed
639
    xd->mode_info_context->bmi[ib].as_mode.first = mode;
640
    rate = bmode_costs[mode];
John Koleszar's avatar
John Koleszar committed
641

642
643
644
    vp9_intra4x4_predict(xd, ib,
                         BLOCK_SIZE_SB8X8,
                         mode, dst, xd->plane[0].dst.stride);
Jingning Han's avatar
Jingning Han committed
645
    vp9_subtract_block(4, 4, src_diff, 8,
John Koleszar's avatar
John Koleszar committed
646
                       src, src_stride,
647
                       dst, xd->plane[0].dst.stride);
John Koleszar's avatar
John Koleszar committed
648

Scott LaVarnway's avatar
Scott LaVarnway committed
649
    xd->mode_info_context->bmi[ib].as_mode.first = mode;
John Koleszar's avatar
John Koleszar committed
650
    tx_type = get_tx_type_4x4(xd, ib);
651
    if (tx_type != DCT_DCT) {
Jingning Han's avatar
Jingning Han committed
652
      vp9_short_fht4x4(src_diff, coeff, 8, tx_type);
653
      x->quantize_b_4x4(x, ib, tx_type, 16);
654
    } else {
Jingning Han's avatar
Jingning Han committed
655
      x->fwd_txm4x4(src_diff, coeff, 16);
656
      x->quantize_b_4x4(x, ib, tx_type, 16);
657
    }
John Koleszar's avatar
John Koleszar committed
658

659
660
    tempa = ta;
    templ = tl;
Jingning Han's avatar
Jingning Han committed
661

662
    ratey = cost_coeffs(cm, x, 0, ib,
John Koleszar's avatar
John Koleszar committed
663
                        PLANE_TYPE_Y_WITH_DC, &tempa, &templ, TX_4X4, 16);
664
    rate += ratey;
John Koleszar's avatar
John Koleszar committed
665
    distortion = vp9_block_error(coeff,
666
667
                                 BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16),
                                 16) >> 2;
Jingning Han's avatar
Jingning Han committed
668

669
    this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
Jingning Han's avatar
Jingning Han committed
670

671
672
673
674
675
676
677
678
679
    if (this_rd < best_rd) {
      *bestrate = rate;
      *bestratey = ratey;
      *bestdistortion = distortion;
      best_rd = this_rd;
      *best_mode = mode;
      best_tx_type = tx_type;
      *a = tempa;
      *l = templ;
680
      vpx_memcpy(best_dqcoeff, BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16), 32);
John Koleszar's avatar
John Koleszar committed
681
    }
John Koleszar's avatar
John Koleszar committed
682
  }
Scott LaVarnway's avatar
Scott LaVarnway committed
683
684
  xd->mode_info_context->bmi[ib].as_mode.first =
    (B_PREDICTION_MODE)(*best_mode);
685

Jingning Han's avatar
Jingning Han committed
686
  // inverse transform
Deb Mukherjee's avatar
Deb Mukherjee committed
687
  if (best_tx_type != DCT_DCT)
Jingning Han's avatar
Jingning Han committed
688
    vp9_short_iht4x4(best_dqcoeff, diff, 8, best_tx_type);
689
  else
Jingning Han's avatar
Jingning Han committed
690
    xd->inv_txm4x4(best_dqcoeff, diff, 16);
Jingning Han's avatar
Jingning Han committed
691

692
693
694
  vp9_intra4x4_predict(xd, ib,
                       BLOCK_SIZE_SB8X8,
                       *best_mode,
695
                       dst, xd->plane[0].dst.stride);
Jingning Han's avatar
Jingning Han committed
696
  vp9_recon_b(dst, diff, 8,
697
              dst, xd->plane[0].dst.stride);
John Koleszar's avatar
John Koleszar committed
698

John Koleszar's avatar
John Koleszar committed
699
  return best_rd;
John Koleszar's avatar
John Koleszar committed
700
701
}

702
703
static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
                                         int *Rate, int *rate_y,
704
                                         int *Distortion, int64_t best_rd) {
John Koleszar's avatar
John Koleszar committed
705
706
  int i;
  MACROBLOCKD *const xd = &mb->e_mbd;
Yaowu Xu's avatar
Yaowu Xu committed
707
  int cost = mb->mbmode_cost[xd->frame_type][I4X4_PRED];
John Koleszar's avatar
John Koleszar committed
708
709
710
  int distortion = 0;
  int tot_rate_y = 0;
  int64_t total_rd = 0;
Jingning Han's avatar
Jingning Han committed
711
  ENTROPY_CONTEXT t_above[2], t_left[2];
John Koleszar's avatar
John Koleszar committed
712
713
  int *bmode_costs;

714
715
  vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
  vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
John Koleszar's avatar
John Koleszar committed
716

Yaowu Xu's avatar
Yaowu Xu committed
717
  xd->mode_info_context->mbmi.mode = I4X4_PRED;
John Koleszar's avatar
John Koleszar committed
718
  bmode_costs = mb->inter_bmode_costs;
719

Jingning Han's avatar
Jingning Han committed
720
721
  for (i = 0; i < 4; i++) {
    const int x_idx = i & 1, y_idx = i >> 1;
John Koleszar's avatar
John Koleszar committed
722
723
724
725
    MODE_INFO *const mic = xd->mode_info_context;
    const int mis = xd->mode_info_stride;
    B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
    int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry), UNINITIALIZED_IS_SAFE(d);
John Koleszar's avatar
John Koleszar committed
726

Paul Wilkins's avatar
Paul Wilkins committed
727
    if (xd->frame_type == KEY_FRAME) {
John Koleszar's avatar
John Koleszar committed
728
729
      const B_PREDICTION_MODE A = above_block_mode(mic, i, mis);
      const B_PREDICTION_MODE L = left_block_mode(mic, i);
730

John Koleszar's avatar
John Koleszar committed
731
732
      bmode_costs  = mb->bmode_costs[A][L];
    }
733

734
735
736
    total_rd += rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs,
                                      t_above + x_idx, t_left + y_idx,
                                      &r, &ry, &d);
John Koleszar's avatar
John Koleszar committed
737

John Koleszar's avatar
John Koleszar committed
738
739
740
    cost += r;
    distortion += d;
    tot_rate_y += ry;
Scott LaVarnway's avatar
Scott LaVarnway committed
741

John Koleszar's avatar
John Koleszar committed
742
    mic->bmi[i].as_mode.first = best_mode;
743

744
    if (total_rd >= best_rd)
John Koleszar's avatar
John Koleszar committed
745
746
      break;
  }
John Koleszar's avatar
John Koleszar committed
747

748
  if (total_rd >= best_rd)
749
    return INT64_MAX;
750

John Koleszar's avatar
John Koleszar committed
751
  *Rate = cost;
752
  *rate_y = tot_rate_y;
John Koleszar's avatar
John Koleszar committed
753
  *Distortion = distortion;
John Koleszar's avatar
John Koleszar committed
754

John Koleszar's avatar
John Koleszar committed
755
  return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
John Koleszar's avatar
John Koleszar committed
756
}
757

758
759
760
761
static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
                                      int *rate, int *rate_tokenonly,
                                      int *distortion, int *skippable,
                                      BLOCK_SIZE_TYPE bsize,
762
                                      int64_t txfm_cache[NB_TXFM_MODES]) {
Ronald S. Bultje's avatar
Ronald S. Bultje committed
763
764
765
  MB_PREDICTION_MODE mode;
  MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
  int this_rate, this_rate_tokenonly;
766
  int this_distortion, s;
767
  int64_t best_rd = INT64_MAX, this_rd;
768
769
  TX_SIZE UNINITIALIZED_IS_SAFE(best_tx);
  int i;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
770

771
772
  for (i = 0; i < NB_TXFM_MODES; i++)
    txfm_cache[i] = INT64_MAX;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
773
774
775

  /* Y Search for 32x32 intra prediction mode */
  for (mode = DC_PRED; mode <= TM_PRED; mode++) {
776
777
    int64_t local_txfm_cache[NB_TXFM_MODES];

Ronald S. Bultje's avatar
Ronald S. Bultje committed
778
    x->e_mbd.mode_info_context->mbmi.mode = mode;
779
    vp9_build_intra_predictors_sby_s(&x->e_mbd, bsize);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
780

781
782
783
    super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s,
                    bsize, local_txfm_cache);
    this_rate = this_rate_tokenonly + x->mbmode_cost[x->e_mbd.frame_type][mode];
Ronald S. Bultje's avatar
Ronald S. Bultje committed
784
785
786
787
788
    this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);

    if (this_rd < best_rd) {
      mode_selected   = mode;
      best_rd         = this_rd;
789
      best_tx         = x->e_mbd.mode_info_context->mbmi.txfm_size;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
790
791
792
793
794
      *rate           = this_rate;
      *rate_tokenonly = this_rate_tokenonly;
      *distortion     = this_distortion;
      *skippable      = s;
    }
795

796
797
    for (i = 0; i < NB_TXFM_MODES; i++) {
      int64_t adj_rd = this_rd + local_txfm_cache[i] -
798
                       local_txfm_cache[cpi->common.txfm_mode];
799
800
      if (adj_rd < txfm_cache[i]) {
        txfm_cache[i] = adj_rd;
John Koleszar's avatar
John Koleszar committed
801
      }
John Koleszar's avatar
John Koleszar committed
802
    }
John Koleszar's avatar
John Koleszar committed
803
  }
John Koleszar's avatar
John Koleszar committed
804

805
806
  x->e_mbd.mode_info_context->mbmi.mode = mode_selected;
  x->e_mbd.mode_info_context->mbmi.txfm_size = best_tx;
807

John Koleszar's avatar
John Koleszar committed
808
  return best_rd;
John Koleszar's avatar
John Koleszar committed
809
}
Jingning Han's avatar
Jingning Han committed
810

811
812
813
814
static void super_block_uvrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
                                      int *rate, int *distortion,
                                      int *skippable, BLOCK_SIZE_TYPE bsize,
                                      TX_SIZE uv_tx_size) {
815
  MACROBLOCKD *const xd = &x->e_mbd;
816
  vp9_xform_quant_sbuv(cm, x, bsize);
817

818
819
820
  *distortion = block_error_sbuv(x, bsize, uv_tx_size == TX_32X32 ? 0 : 2);
  *rate       = rdcost_uv(cm, x, bsize, uv_tx_size);
  *skippable  = vp9_sbuv_is_skippable(xd, bsize);
821
822
823
824
825
826
827
828
829
830
831
}

static void super_block_uvrd(VP9_COMMON *const cm, MACROBLOCK *x,
                             int *rate, int *distortion, int *skippable,
                             BLOCK_SIZE_TYPE bsize) {
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;

  vp9_subtract_sbuv(x, bsize);

  if (mbmi->txfm_size >= TX_32X32 && bsize >= BLOCK_SIZE_SB64X64) {
832
833
    super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, bsize,
                              TX_32X32);
834
  } else if (mbmi->txfm_size >= TX_16X16 && bsize >= BLOCK_SIZE_SB32X32) {
835
836
    super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, bsize,
                              TX_16X16);<