vp9_rdopt.c 123 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
11
12
13
14
15
 */


#include <stdio.h>
#include <math.h>
#include <limits.h>
#include <assert.h>
16

17
#include "vp9/common/vp9_pragmas.h"
18
19
20
21
22
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_modecosts.h"
#include "vp9/encoder/vp9_encodeintra.h"
23
24
25
26
27
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_findnearmv.h"
#include "vp9/common/vp9_quant_common.h"
28
29
30
31
32
33
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_variance.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_ratectrl.h"
John Koleszar's avatar
John Koleszar committed
34
#include "vpx_mem/vpx_mem.h"
35
36
37
38
39
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_entropy.h"
40
#include "vp9_rtcd.h"
41
#include "vp9/common/vp9_mvref_common.h"
Ronald S. Bultje's avatar
Ronald S. Bultje committed
42
#include "vp9/common/vp9_common.h"
Paul Wilkins's avatar
Paul Wilkins committed
43

44
45
#define INVALID_MV 0x80008000

46
47
48
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1

49
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
50
51
  {ZEROMV,    LAST_FRAME,   NONE},
  {DC_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
52

53
54
  {NEARESTMV, LAST_FRAME,   NONE},
  {NEARMV,    LAST_FRAME,   NONE},
John Koleszar's avatar
John Koleszar committed
55

56
57
  {ZEROMV,    GOLDEN_FRAME, NONE},
  {NEARESTMV, GOLDEN_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
58

59
60
  {ZEROMV,    ALTREF_FRAME, NONE},
  {NEARESTMV, ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
61

62
63
  {NEARMV,    GOLDEN_FRAME, NONE},
  {NEARMV,    ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
64

65
66
67
68
69
70
71
72
  {V_PRED,    INTRA_FRAME,  NONE},
  {H_PRED,    INTRA_FRAME,  NONE},
  {D45_PRED,  INTRA_FRAME,  NONE},
  {D135_PRED, INTRA_FRAME,  NONE},
  {D117_PRED, INTRA_FRAME,  NONE},
  {D153_PRED, INTRA_FRAME,  NONE},
  {D27_PRED,  INTRA_FRAME,  NONE},
  {D63_PRED,  INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
73

74
  {TM_PRED,   INTRA_FRAME,  NONE},
John Koleszar's avatar
John Koleszar committed
75

76
77
78
  {NEWMV,     LAST_FRAME,   NONE},
  {NEWMV,     GOLDEN_FRAME, NONE},
  {NEWMV,     ALTREF_FRAME, NONE},
John Koleszar's avatar
John Koleszar committed
79

80
81
82
  {SPLITMV,   LAST_FRAME,   NONE},
  {SPLITMV,   GOLDEN_FRAME, NONE},
  {SPLITMV,   ALTREF_FRAME, NONE},
83

Yaowu Xu's avatar
Yaowu Xu committed
84
  {I4X4_PRED,    INTRA_FRAME,  NONE},
85

John Koleszar's avatar
John Koleszar committed
86
87
88
89
  /* compound prediction modes */
  {ZEROMV,    LAST_FRAME,   GOLDEN_FRAME},
  {NEARESTMV, LAST_FRAME,   GOLDEN_FRAME},
  {NEARMV,    LAST_FRAME,   GOLDEN_FRAME},
90

John Koleszar's avatar
John Koleszar committed
91
92
93
  {ZEROMV,    ALTREF_FRAME, LAST_FRAME},
  {NEARESTMV, ALTREF_FRAME, LAST_FRAME},
  {NEARMV,    ALTREF_FRAME, LAST_FRAME},
94

John Koleszar's avatar
John Koleszar committed
95
96
97
  {ZEROMV,    GOLDEN_FRAME, ALTREF_FRAME},
  {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
  {NEARMV,    GOLDEN_FRAME, ALTREF_FRAME},
98

John Koleszar's avatar
John Koleszar committed
99
100
101
  {NEWMV,     LAST_FRAME,   GOLDEN_FRAME},
  {NEWMV,     ALTREF_FRAME, LAST_FRAME  },
  {NEWMV,     GOLDEN_FRAME, ALTREF_FRAME},
102

John Koleszar's avatar
John Koleszar committed
103
104
  {SPLITMV,   LAST_FRAME,   GOLDEN_FRAME},
  {SPLITMV,   ALTREF_FRAME, LAST_FRAME  },
105
  {SPLITMV,   GOLDEN_FRAME, ALTREF_FRAME},
John Koleszar's avatar
John Koleszar committed
106
107
};

108
static void fill_token_costs(vp9_coeff_count *c,
109
                             vp9_coeff_probs_model *p,
110
                             TX_SIZE tx_size) {
111
  int i, j, k, l;
John Koleszar's avatar
John Koleszar committed
112

113
  for (i = 0; i < BLOCK_TYPES; i++)
114
115
    for (j = 0; j < REF_TYPES; j++)
      for (k = 0; k < COEF_BANDS; k++)
116
117
        for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
          vp9_prob probs[ENTROPY_NODES];
118
          vp9_model_to_full_probs(p[i][j][k][l], probs);
119
120
121
          vp9_cost_tokens_skip((int *)c[i][j][k][l], probs,
                               vp9_coef_tree);
        }
122
123
}

124
125
126
127
static int rd_iifactor[32] =  { 4, 4, 3, 2, 1, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0,
                                0, 0, 0, 0, 0, 0, 0, 0, };
John Koleszar's avatar
John Koleszar committed
128

129
// 3* dc_qlookup[Q]*dc_qlookup[Q];
130

131
/* values are now correlated to quantizer */
Paul Wilkins's avatar
Paul Wilkins committed
132
133
134
static int sad_per_bit16lut[QINDEX_RANGE];
static int sad_per_bit4lut[QINDEX_RANGE];

135
void vp9_init_me_luts() {
John Koleszar's avatar
John Koleszar committed
136
137
138
139
140
141
142
  int i;

  // Initialize the sad lut tables using a formulaic calculation for now
  // This is to make it easier to resolve the impact of experimental changes
  // to the quantizer tables.
  for (i = 0; i < QINDEX_RANGE; i++) {
    sad_per_bit16lut[i] =
143
      (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
144
    sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742);
John Koleszar's avatar
John Koleszar committed
145
  }
Paul Wilkins's avatar
Paul Wilkins committed
146
}
John Koleszar's avatar
John Koleszar committed
147

148
static int compute_rd_mult(int qindex) {
149
  const int q = vp9_dc_quant(qindex, 0);
150
  return (11 * q * q) >> 2;
151
152
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
153
154
155
void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
  cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
  cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
John Koleszar's avatar
John Koleszar committed
156
157
}

158

Dmitry Kovalev's avatar
Dmitry Kovalev committed
159
void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
160
  int q, i;
John Koleszar's avatar
John Koleszar committed
161

162
  vp9_clear_system_state();  // __asm emms;
John Koleszar's avatar
John Koleszar committed
163

John Koleszar's avatar
John Koleszar committed
164
165
166
167
  // Further tests required to see if optimum is different
  // for key frames, golden frames and arf frames.
  // if (cpi->common.refresh_golden_frame ||
  //     cpi->common.refresh_alt_ref_frame)
168
  qindex = clamp(qindex, 0, MAXQ);
169

Dmitry Kovalev's avatar
Dmitry Kovalev committed
170
  cpi->RDMULT = compute_rd_mult(qindex);
John Koleszar's avatar
John Koleszar committed
171
172
173
174
175
  if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
    if (cpi->twopass.next_iiratio > 31)
      cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
    else
      cpi->RDMULT +=
Dmitry Kovalev's avatar
Dmitry Kovalev committed
176
          (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
John Koleszar's avatar
John Koleszar committed
177
  }
178
  cpi->mb.errorperbit = cpi->RDMULT >> 6;
John Koleszar's avatar
John Koleszar committed
179
  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
180

181
  vp9_set_speed_features(cpi);
John Koleszar's avatar
John Koleszar committed
182

Dmitry Kovalev's avatar
Dmitry Kovalev committed
183
184
  q = (int)pow(vp9_dc_quant(qindex, 0) >> 2, 1.25);
  q <<= 2;
John Koleszar's avatar
John Koleszar committed
185
186
  if (q < 8)
    q = 8;
187

John Koleszar's avatar
John Koleszar committed
188
189
190
  if (cpi->RDMULT > 1000) {
    cpi->RDDIV = 1;
    cpi->RDMULT /= 100;
John Koleszar's avatar
John Koleszar committed
191

John Koleszar's avatar
John Koleszar committed
192
193
194
195
196
197
198
    for (i = 0; i < MAX_MODES; i++) {
      if (cpi->sf.thresh_mult[i] < INT_MAX) {
        cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100;
      } else {
        cpi->rd_threshes[i] = INT_MAX;
      }
      cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
John Koleszar's avatar
John Koleszar committed
199
    }
John Koleszar's avatar
John Koleszar committed
200
201
  } else {
    cpi->RDDIV = 100;
John Koleszar's avatar
John Koleszar committed
202

John Koleszar's avatar
John Koleszar committed
203
204
205
206
207
208
209
    for (i = 0; i < MAX_MODES; i++) {
      if (cpi->sf.thresh_mult[i] < (INT_MAX / q)) {
        cpi->rd_threshes[i] = cpi->sf.thresh_mult[i] * q;
      } else {
        cpi->rd_threshes[i] = INT_MAX;
      }
      cpi->rd_baseline_thresh[i] = cpi->rd_threshes[i];
John Koleszar's avatar
John Koleszar committed
210
    }
John Koleszar's avatar
John Koleszar committed
211
  }
John Koleszar's avatar
John Koleszar committed
212

213
  fill_token_costs(cpi->mb.token_costs[TX_4X4],
214
                   cpi->common.fc.coef_probs_4x4, TX_4X4);
215
  fill_token_costs(cpi->mb.token_costs[TX_8X8],
216
                   cpi->common.fc.coef_probs_8x8, TX_8X8);
217
  fill_token_costs(cpi->mb.token_costs[TX_16X16],
218
                   cpi->common.fc.coef_probs_16x16, TX_16X16);
219
  fill_token_costs(cpi->mb.token_costs[TX_32X32],
220
                   cpi->common.fc.coef_probs_32x32, TX_32X32);
221

222
  for (i = 0; i < NUM_PARTITION_CONTEXTS; i++)
223
224
225
226
    vp9_cost_tokens(cpi->mb.partition_cost[i],
                    cpi->common.fc.partition_prob[i],
                    vp9_partition_tree);

John Koleszar's avatar
John Koleszar committed
227
228
  /*rough estimate for costing*/
  cpi->common.kf_ymode_probs_index = cpi->common.base_qindex >> 4;
229
  vp9_init_mode_costs(cpi);
John Koleszar's avatar
John Koleszar committed
230

231
  if (cpi->common.frame_type != KEY_FRAME) {
232
    vp9_build_nmv_cost_table(
233
234
235
236
237
238
        cpi->mb.nmvjointcost,
        cpi->mb.e_mbd.allow_high_precision_mv ?
        cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
        &cpi->common.fc.nmvc,
        cpi->mb.e_mbd.allow_high_precision_mv, 1, 1);
  }
John Koleszar's avatar
John Koleszar committed
239
240
}

241
int vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, int block_size) {
242
  int i, error = 0;
John Koleszar's avatar
John Koleszar committed
243

244
  for (i = 0; i < block_size; i++) {
John Koleszar's avatar
John Koleszar committed
245
246
247
    int this_diff = coeff[i] - dqcoeff[i];
    error += this_diff * this_diff;
  }
John Koleszar's avatar
John Koleszar committed
248

John Koleszar's avatar
John Koleszar committed
249
  return error;
John Koleszar's avatar
John Koleszar committed
250
251
}

252
static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
253
                              int plane, int block, PLANE_TYPE type,
254
255
                              ENTROPY_CONTEXT *A,
                              ENTROPY_CONTEXT *L,
John Koleszar's avatar
John Koleszar committed
256
257
                              TX_SIZE tx_size,
                              int y_blocks) {
258
  MACROBLOCKD *const xd = &mb->e_mbd;
259
260
  MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
  int pt;
261
  int c = 0;
262
263
  int cost = 0, pad;
  const int *scan, *nb;
264
265
266
  const int eob = xd->plane[plane].eobs[block];
  const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff,
                                           block, 16);
267
  const int ref = mbmi->ref_frame != INTRA_FRAME;
268
  unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
269
      mb->token_costs[tx_size][type][ref];
270
  ENTROPY_CONTEXT above_ec, left_ec;
271
  TX_TYPE tx_type = DCT_DCT;
272

273
  const int segment_id = xd->mode_info_context->mbmi.segment_id;
274
  vp9_prob coef_probs[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
275
276
  int seg_eob, default_eob;
  uint8_t token_cache[1024];
Paul Wilkins's avatar
Paul Wilkins committed
277
  const uint8_t * band_translate;
278
279

  // Check for consistency of tx_size with mode info
280
  assert((!type && !plane) || (type && plane));
281
282
283
284
285
286
287
  if (type == PLANE_TYPE_Y_WITH_DC) {
    assert(xd->mode_info_context->mbmi.txfm_size == tx_size);
  } else {
    TX_SIZE tx_size_uv = get_uv_tx_size(xd);
    assert(tx_size == tx_size_uv);
  }

288
  switch (tx_size) {
289
    case TX_4X4: {
290
      tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
291
          get_tx_type_4x4(xd, block) : DCT_DCT;
292
293
      above_ec = A[0] != 0;
      left_ec = L[0] != 0;
294
      vp9_model_to_full_probs_sb(cm->fc.coef_probs_4x4[type][ref],
295
                                 coef_probs);
296
      seg_eob = 16;
297
      scan = get_scan_4x4(tx_type);
Paul Wilkins's avatar
Paul Wilkins committed
298
      band_translate = vp9_coefband_trans_4x4;
Daniel Kang's avatar
Daniel Kang committed
299
      break;
300
    }
301
302
    case TX_8X8: {
      const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
303
      const int sz = 1 + b_width_log2(sb_type);
304
      const int x = block & ((1 << sz) - 1), y = block - x;
305
306
      TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
          get_tx_type_8x8(xd, y + (x >> 1)) : DCT_DCT;
307
308
      above_ec = (A[0] + A[1]) != 0;
      left_ec = (L[0] + L[1]) != 0;
309
      scan = get_scan_8x8(tx_type);
310
      vp9_model_to_full_probs_sb(cm->fc.coef_probs_8x8[type][ref],
311
                                 coef_probs);
312
      seg_eob = 64;
Paul Wilkins's avatar
Paul Wilkins committed
313
      band_translate = vp9_coefband_trans_8x8plus;
Daniel Kang's avatar
Daniel Kang committed
314
      break;
315
316
317
    }
    case TX_16X16: {
      const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
318
      const int sz = 2 + b_width_log2(sb_type);
319
      const int x = block & ((1 << sz) - 1), y = block - x;
320
321
      TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
          get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT;
322
      scan = get_scan_16x16(tx_type);
323
      vp9_model_to_full_probs_sb(cm->fc.coef_probs_16x16[type][ref],
324
                                 coef_probs);
325
      seg_eob = 256;
326
327
      above_ec = (A[0] + A[1] + A[2] + A[3]) != 0;
      left_ec = (L[0] + L[1] + L[2] + L[3]) != 0;
Paul Wilkins's avatar
Paul Wilkins committed
328
      band_translate = vp9_coefband_trans_8x8plus;
Daniel Kang's avatar
Daniel Kang committed
329
      break;
330
    }
331
332
    case TX_32X32:
      scan = vp9_default_zig_zag1d_32x32;
333
      vp9_model_to_full_probs_sb(cm->fc.coef_probs_32x32[type][ref],
334
                                 coef_probs);
335
      seg_eob = 1024;
336
337
      above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0;
      left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0;
Paul Wilkins's avatar
Paul Wilkins committed
338
      band_translate = vp9_coefband_trans_8x8plus;
339
      break;
Daniel Kang's avatar
Daniel Kang committed
340
    default:
341
      abort();
Daniel Kang's avatar
Daniel Kang committed
342
343
      break;
  }
John Koleszar's avatar
John Koleszar committed
344
  assert(eob <= seg_eob);
345

346
  pt = combine_entropy_contexts(above_ec, left_ec);
347
348
  nb = vp9_get_coef_neighbors_handle(scan, &pad);
  default_eob = seg_eob;
349

350
351
  if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))
    seg_eob = 0;
352

353
354
355
356
  /* sanity check to ensure that we do not have spurious non-zero q values */
  if (eob < seg_eob)
    assert(qcoeff_ptr[scan[eob]] == 0);

357
  {
358
    for (c = 0; c < eob; c++) {
359
      int v = qcoeff_ptr[scan[c]];
360
      int t = vp9_dct_value_tokens_ptr[v].token;
Paul Wilkins's avatar
Paul Wilkins committed
361
      int band = get_coef_band(band_translate, c);
362
363
      if (c)
        pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
364

365
      cost += token_costs[band][pt][t] + vp9_dct_value_cost_ptr[v];
366

367
      if (!c || token_cache[scan[c - 1]])
368
        cost += vp9_cost_bit(coef_probs[band][pt][0], 1);
369
      token_cache[scan[c]] = t;
370
    }
371
372
373
374
    if (c < seg_eob) {
      if (c)
        pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
      cost += mb->token_costs[tx_size][type][ref]
Paul Wilkins's avatar
Paul Wilkins committed
375
          [get_coef_band(band_translate, c)]
376
377
          [pt][DCT_EOB_TOKEN];
    }
378
379
  }

380
381
382
  // is eob first coefficient;
  for (pt = 0; pt < (1 << tx_size); pt++) {
    A[pt] = L[pt] = c > 0;
383
  }
384

385
386
387
  return cost;
}

388
static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
389
390
391
392
393
                                     int (*r)[2], int *rate,
                                     int *d, int *distortion,
                                     int *s, int *skip,
                                     int64_t txfm_cache[NB_TXFM_MODES],
                                     TX_SIZE max_txfm_size) {
394
395
396
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
397
  vp9_prob skip_prob = vp9_get_pred_prob(cm, xd, PRED_MBSKIP);
398
399
  int64_t rd[TX_SIZE_MAX_SB][2];
  int n, m;
400
  int s0, s1;
401
402
403
404
405
406
407
408
409
410

  for (n = TX_4X4; n <= max_txfm_size; n++) {
    r[n][1] = r[n][0];
    for (m = 0; m <= n - (n == max_txfm_size); m++) {
      if (m == n)
        r[n][1] += vp9_cost_zero(cm->prob_tx[m]);
      else
        r[n][1] += vp9_cost_one(cm->prob_tx[m]);
    }
  }
411

412
413
414
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
415

416
417
418
419
420
421
  for (n = TX_4X4; n <= max_txfm_size; n++) {
    if (s[n]) {
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
422
423
424
    }
  }

425
426
427
428
429
430
  if (max_txfm_size == TX_32X32 &&
      (cm->txfm_mode == ALLOW_32X32 ||
       (cm->txfm_mode == TX_MODE_SELECT &&
        rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
        rd[TX_32X32][1] < rd[TX_4X4][1]))) {
    mbmi->txfm_size = TX_32X32;
431
432
433
434
435
436
  } else if (max_txfm_size >= TX_16X16 &&
             (cm->txfm_mode == ALLOW_16X16 ||
              cm->txfm_mode == ALLOW_32X32 ||
              (cm->txfm_mode == TX_MODE_SELECT &&
               rd[TX_16X16][1] < rd[TX_8X8][1] &&
               rd[TX_16X16][1] < rd[TX_4X4][1]))) {
437
    mbmi->txfm_size = TX_16X16;
438
  } else if (cm->txfm_mode == ALLOW_8X8 ||
439
440
             cm->txfm_mode == ALLOW_16X16 ||
             cm->txfm_mode == ALLOW_32X32 ||
441
           (cm->txfm_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
442
443
444
445
446
    mbmi->txfm_size = TX_8X8;
  } else {
    mbmi->txfm_size = TX_4X4;
  }

447
  *distortion = d[mbmi->txfm_size];
448
  *rate       = r[mbmi->txfm_size][cm->txfm_mode == TX_MODE_SELECT];
449
450
  *skip       = s[mbmi->txfm_size];

451
452
  txfm_cache[ONLY_4X4] = rd[TX_4X4][0];
  txfm_cache[ALLOW_8X8] = rd[TX_8X8][0];
453
454
  txfm_cache[ALLOW_16X16] = rd[MIN(max_txfm_size, TX_16X16)][0];
  txfm_cache[ALLOW_32X32] = rd[MIN(max_txfm_size, TX_32X32)][0];
455
456
457
458
  if (max_txfm_size == TX_32X32 &&
      rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
      rd[TX_32X32][1] < rd[TX_4X4][1])
    txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
459
460
  else if (max_txfm_size >= TX_16X16 &&
           rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
461
    txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
462
  else
463
464
    txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
                                 rd[TX_4X4][1] : rd[TX_8X8][1];
465
466
}

467
468
static int block_error(int16_t *coeff, int16_t *dqcoeff,
                       int block_size, int shift) {
469
470
471
472
  int i;
  int64_t error = 0;

  for (i = 0; i < block_size; i++) {
473
    int this_diff = coeff[i] - dqcoeff[i];
474
    error += (unsigned)this_diff * this_diff;
475
  }
476
  error >>= shift;
477

Frank Galligan's avatar
Frank Galligan committed
478
  return error > INT_MAX ? INT_MAX : (int)error;
479
480
}

481
482
static int block_error_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) {
  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
483
  return block_error(x->plane[0].coeff, x->e_mbd.plane[0].dqcoeff,
484
                     16 << (bwl + bhl), shift);
485
}
486

487
488
489
490
static int block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) {
  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
  int64_t sum = 0;
  int plane;
491

492
493
494
495
496
497
498
499
  for (plane = 1; plane < MAX_MB_PLANE; plane++) {
    const int subsampling = x->e_mbd.plane[plane].subsampling_x +
                            x->e_mbd.plane[plane].subsampling_y;
    sum += block_error(x->plane[plane].coeff, x->e_mbd.plane[plane].dqcoeff,
                       16 << (bwl + bhl - subsampling), 0);
  }
  sum >>= shift;
  return sum > INT_MAX ? INT_MAX : (int)sum;
500
501
}

502
503
static int rdcost_plane(VP9_COMMON *const cm, MACROBLOCK *x,
                        int plane, BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
504
  MACROBLOCKD *const xd = &x->e_mbd;
505
506
507
  const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
  const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y;
  const int bw = 1 << bwl, bh = 1 << bhl;
508
  ENTROPY_CONTEXT t_above[16], t_left[16];
509
  int block, cost;
510

511
  vpx_memcpy(&t_above, xd->plane[plane].above_context,
512
             sizeof(ENTROPY_CONTEXT) * bw);
513
  vpx_memcpy(&t_left,  xd->plane[plane].left_context,
514
             sizeof(ENTROPY_CONTEXT) * bh);
515

516
517
518
  cost = 0;
  for (block = 0; block < bw * bh; block += 1 << (tx_size * 2)) {
    int x_idx, y_idx;
519

520
521
    txfrm_block_to_raster_xy(xd, bsize, plane, block, tx_size * 2,
                             &x_idx, &y_idx);
522

523
524
525
    cost += cost_coeffs(cm, x, plane, block, xd->plane[plane].plane_type,
                        t_above + x_idx, t_left + y_idx,
                        tx_size, bw * bh);
526
  }
527
528
529
530

  return cost;
}

531
532
533
static int rdcost_uv(VP9_COMMON *const cm, MACROBLOCK *x,
                     BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
  int cost = 0, plane;
534

535
536
  for (plane = 1; plane < MAX_MB_PLANE; plane++) {
    cost += rdcost_plane(cm, x, plane, bsize, tx_size);
537
538
  }
  return cost;
539
540
}

541
542
543
static void super_block_yrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
                                     int *rate, int *distortion, int *skippable,
                                     BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
544
  MACROBLOCKD *const xd = &x->e_mbd;
545
546
  xd->mode_info_context->mbmi.txfm_size = tx_size;
  vp9_xform_quant_sby(cm, x, bsize);
547

548
549
  *distortion = block_error_sby(x, bsize, tx_size == TX_32X32 ? 0 : 2);
  *rate       = rdcost_plane(cm, x, 0, bsize, tx_size);
550
  *skippable  = vp9_sby_is_skippable(xd, bsize);
551
552
}

553
554
static void super_block_yrd(VP9_COMP *cpi,
                            MACROBLOCK *x, int *rate, int *distortion,
555
                            int *skip, BLOCK_SIZE_TYPE bs,
556
                            int64_t txfm_cache[NB_TXFM_MODES]) {
557
558
  VP9_COMMON *const cm = &cpi->common;
  int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB];
Ronald S. Bultje's avatar
Ronald S. Bultje committed
559

560
  vp9_subtract_sby(x, bs);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
561

562
  if (bs >= BLOCK_SIZE_SB32X32)
563
564
    super_block_yrd_for_txfm(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32],
                             bs, TX_32X32);
565
  if (bs >= BLOCK_SIZE_MB16X16)
566
567
568
569
570
571
    super_block_yrd_for_txfm(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16],
                             bs, TX_16X16);
  super_block_yrd_for_txfm(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], bs,
                           TX_8X8);
  super_block_yrd_for_txfm(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], bs,
                           TX_4X4);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
572
573

  choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache,
574
                           TX_32X32 - (bs < BLOCK_SIZE_SB32X32)
Jingning Han's avatar
Jingning Han committed
575
                           - (bs < BLOCK_SIZE_MB16X16));
Ronald S. Bultje's avatar
Ronald S. Bultje committed
576
}
Ronald S. Bultje's avatar
Ronald S. Bultje committed
577

578
579
static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
                                     B_PREDICTION_MODE *best_mode,
580
581
582
                                     int *bmode_costs,
                                     ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                                     int *bestrate, int *bestratey,
583
584
585
                                     int *bestdistortion,
                                     BLOCK_SIZE_TYPE bsize) {
  MB_PREDICTION_MODE mode;
Deb Mukherjee's avatar
Deb Mukherjee committed
586
  MACROBLOCKD *xd = &x->e_mbd;
587
  int64_t best_rd = INT64_MAX;
John Koleszar's avatar
John Koleszar committed
588
589
  int rate = 0;
  int distortion;
590
  VP9_COMMON *const cm = &cpi->common;
John Koleszar's avatar
John Koleszar committed
591
  const int src_stride = x->plane[0].src.stride;
592
593
594
595
596
  uint8_t *src, *dst;
  int16_t *src_diff, *coeff;

  ENTROPY_CONTEXT ta[2], tempa[2];
  ENTROPY_CONTEXT tl[2], templ[2];
Deb Mukherjee's avatar
Deb Mukherjee committed
597
598
  TX_TYPE tx_type = DCT_DCT;
  TX_TYPE best_tx_type = DCT_DCT;
599
600
601
602
  int bw = 1 << b_width_log2(bsize);
  int bh = 1 << b_height_log2(bsize);
  int idx, idy, block;
  DECLARE_ALIGNED(16, int16_t, best_dqcoeff[4][16]);
John Koleszar's avatar
John Koleszar committed
603

Jingning Han's avatar
Jingning Han committed
604
  assert(ib < 4);
605

606
607
  vpx_memcpy(ta, a, sizeof(ta));
  vpx_memcpy(tl, l, sizeof(tl));
608
  xd->mode_info_context->mbmi.txfm_size = TX_4X4;
609
610

  for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
611
    int64_t this_rd;
612
    int ratey = 0;
613

Scott LaVarnway's avatar
Scott LaVarnway committed
614
    xd->mode_info_context->bmi[ib].as_mode.first = mode;
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
    if (cm->frame_type == KEY_FRAME)
      rate = bmode_costs[mode];
    else
      rate = x->mbmode_cost[cm->frame_type][mode];
    distortion = 0;

    vpx_memcpy(tempa, ta, sizeof(ta));
    vpx_memcpy(templ, tl, sizeof(tl));

    for (idy = 0; idy < bh; ++idy) {
      for (idx = 0; idx < bw; ++idx) {
        block = ib + idy * 2 + idx;
        xd->mode_info_context->bmi[block].as_mode.first = mode;
        src = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
                                        x->plane[0].src.buf, src_stride);
        src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, block,
                                             x->plane[0].src_diff);
        coeff = BLOCK_OFFSET(x->plane[0].coeff, block, 16);
        dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
                                        xd->plane[0].dst.buf,
                                        xd->plane[0].dst.stride);
        vp9_intra4x4_predict(xd, block,
                             BLOCK_SIZE_SB8X8,
                             mode, dst, xd->plane[0].dst.stride);
        vp9_subtract_block(4, 4, src_diff, 8,
                           src, src_stride,
                           dst, xd->plane[0].dst.stride);

        tx_type = get_tx_type_4x4(xd, block);
        if (tx_type != DCT_DCT) {
          vp9_short_fht4x4(src_diff, coeff, 8, tx_type);
          x->quantize_b_4x4(x, block, tx_type, 16);
        } else {
          x->fwd_txm4x4(src_diff, coeff, 16);
          x->quantize_b_4x4(x, block, tx_type, 16);
        }
John Koleszar's avatar
John Koleszar committed
651

652
653
654
655
        ratey += cost_coeffs(cm, x, 0, block, PLANE_TYPE_Y_WITH_DC,
                             tempa + idx, templ + idy, TX_4X4, 16);
        distortion += vp9_block_error(coeff, BLOCK_OFFSET(xd->plane[0].dqcoeff,
                                                         block, 16), 16) >> 2;
John Koleszar's avatar
John Koleszar committed
656

657
658
        vp9_intra4x4_predict(xd, block, BLOCK_SIZE_SB8X8, *best_mode,
                             dst, xd->plane[0].dst.stride);
John Koleszar's avatar
John Koleszar committed
659

660
661
662
663
664
665
666
667
        if (best_tx_type != DCT_DCT)
          vp9_short_iht4x4_add(BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
                               dst, xd->plane[0].dst.stride, best_tx_type);
        else
          xd->inv_txm4x4_add(BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
                             dst, xd->plane[0].dst.stride);
      }
    }
Jingning Han's avatar
Jingning Han committed
668

669
670
    rate += ratey;
    this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
Jingning Han's avatar
Jingning Han committed
671

672
673
674
675
676
677
678
    if (this_rd < best_rd) {
      *bestrate = rate;
      *bestratey = ratey;
      *bestdistortion = distortion;
      best_rd = this_rd;
      *best_mode = mode;
      best_tx_type = tx_type;
679
680
681
682
683
684
685
686
687
688
      vpx_memcpy(a, tempa, sizeof(tempa));
      vpx_memcpy(l, templ, sizeof(templ));
      for (idy = 0; idy < bh; ++idy) {
        for (idx = 0; idx < bw; ++idx) {
          block = ib + idy * 2 + idx;
          vpx_memcpy(best_dqcoeff[idy * 2 + idx],
                     BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
                     sizeof(best_dqcoeff[0]));
        }
      }
John Koleszar's avatar
John Koleszar committed
689
    }
John Koleszar's avatar
John Koleszar committed
690
  }
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709

  for (idy = 0; idy < bh; ++idy) {
    for (idx = 0; idx < bw; ++idx) {
      block = ib + idy * 2 + idx;
      xd->mode_info_context->bmi[block].as_mode.first = *best_mode;
      dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
                                      xd->plane[0].dst.buf,
                                      xd->plane[0].dst.stride);

      vp9_intra4x4_predict(xd, block, BLOCK_SIZE_SB8X8, *best_mode,
                           dst, xd->plane[0].dst.stride);
      // inverse transform
      if (best_tx_type != DCT_DCT)
        vp9_short_iht4x4_add(best_dqcoeff[idy * 2 + idx], dst,
                             xd->plane[0].dst.stride, best_tx_type);
      else
        xd->inv_txm4x4_add(best_dqcoeff[idy * 2 + idx], dst,
                           xd->plane[0].dst.stride);
    }
Scott LaVarnway's avatar
Scott LaVarnway committed
710
  }
John Koleszar's avatar
John Koleszar committed
711

John Koleszar's avatar
John Koleszar committed
712
  return best_rd;
John Koleszar's avatar
John Koleszar committed
713
714
}

715
716
static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
                                         int *Rate, int *rate_y,
717
                                         int *Distortion, int64_t best_rd) {
718
  int i, j;
John Koleszar's avatar
John Koleszar committed
719
  MACROBLOCKD *const xd = &mb->e_mbd;
720
721
722
723
  BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type;
  int bw = 1 << b_width_log2(bsize);
  int bh = 1 << b_height_log2(bsize);
  int idx, idy;
724
  int cost = 0;
John Koleszar's avatar
John Koleszar committed
725
726
727
  int distortion = 0;
  int tot_rate_y = 0;
  int64_t total_rd = 0;
728
  ENTROPY_CONTEXT t_above[4], t_left[4];
John Koleszar's avatar
John Koleszar committed
729
730
  int *bmode_costs;

731
732
  vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
  vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
John Koleszar's avatar
John Koleszar committed
733

Yaowu Xu's avatar
Yaowu Xu committed
734
  xd->mode_info_context->mbmi.mode = I4X4_PRED;
John Koleszar's avatar
John Koleszar committed
735
  bmode_costs = mb->inter_bmode_costs;
736

737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
  for (idy = 0; idy < 2; idy += bh) {
    for (idx = 0; idx < 2; idx += bw) {
      MODE_INFO *const mic = xd->mode_info_context;
      const int mis = xd->mode_info_stride;
      B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
      int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry);
      int UNINITIALIZED_IS_SAFE(d);
      i = idy * 2 + idx;

      if (xd->frame_type == KEY_FRAME) {
        const MB_PREDICTION_MODE A = above_block_mode(mic, i, mis);
        const MB_PREDICTION_MODE L = (xd->left_available || idx) ?
                                     left_block_mode(mic, i) : DC_PRED;

        bmode_costs  = mb->bmode_costs[A][L];
      }
753

754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
      total_rd += rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs,
                                        t_above + idx, t_left + idy,
                                        &r, &ry, &d, bsize);
      cost += r;
      distortion += d;
      tot_rate_y += ry;

      mic->bmi[i].as_mode.first = best_mode;
      for (j = 1; j < bh; ++j)
        mic->bmi[i + j * 2].as_mode.first = best_mode;
      for (j = 1; j < bw; ++j)
        mic->bmi[i + j].as_mode.first = best_mode;

      if (total_rd >= best_rd)
        break;
John Koleszar's avatar
John Koleszar committed
769
770
    }
  }
John Koleszar's avatar
John Koleszar committed
771

772
  if (total_rd >= best_rd)
773
    return INT64_MAX;
774

John Koleszar's avatar
John Koleszar committed
775
  *Rate = cost;
776
  *rate_y = tot_rate_y;
John Koleszar's avatar
John Koleszar committed
777
  *Distortion = distortion;
John Koleszar's avatar
John Koleszar committed
778

John Koleszar's avatar
John Koleszar committed
779
  return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
John Koleszar's avatar
John Koleszar committed
780
}
781

782
783
784
785
static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
                                      int *rate, int *rate_tokenonly,
                                      int *distortion, int *skippable,
                                      BLOCK_SIZE_TYPE bsize,
786
                                      int64_t txfm_cache[NB_TXFM_MODES]) {
Ronald S. Bultje's avatar
Ronald S. Bultje committed
787
788
  MB_PREDICTION_MODE mode;
  MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
789
  MACROBLOCKD *xd = &x->e_mbd;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
790
  int this_rate, this_rate_tokenonly;
791
  int this_distortion, s;
792
  int64_t best_rd = INT64_MAX, this_rd;
793
794
  TX_SIZE UNINITIALIZED_IS_SAFE(best_tx);
  int i;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
795

796
797
798
799
800
  if (bsize < BLOCK_SIZE_SB8X8) {
    x->e_mbd.mode_info_context->mbmi.txfm_size = TX_4X4;
    return best_rd;
  }

801
802
  for (i = 0; i < NB_TXFM_MODES; i++)
    txfm_cache[i] = INT64_MAX;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
803
804
805

  /* Y Search for 32x32 intra prediction mode */
  for (mode = DC_PRED; mode <= TM_PRED; mode++) {
806
    int64_t local_txfm_cache[NB_TXFM_MODES];
807
808
809
810
811
812
813
    MODE_INFO *const mic = xd->mode_info_context;
    const int mis = xd->mode_info_stride;
    const MB_PREDICTION_MODE A = above_block_mode(mic, 0, mis);
    const MB_PREDICTION_MODE L = xd->left_available ?
                                 left_block_mode(mic, 0) : DC_PRED;

    int *bmode_costs  = x->bmode_costs[A][L];
814

Ronald S. Bultje's avatar
Ronald S. Bultje committed
815
    x->e_mbd.mode_info_context->mbmi.mode = mode;
816
    vp9_build_intra_predictors_sby_s(&x->e_mbd, bsize);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
817

818
819
    super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s,
                    bsize, local_txfm_cache);
820
    this_rate = this_rate_tokenonly + bmode_costs[mode];
Ronald S. Bultje's avatar
Ronald S. Bultje committed
821
822
823
824
825
    this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);

    if (this_rd < best_rd) {
      mode_selected   = mode;
      best_rd         = this_rd;
826
      best_tx         = x->e_mbd.mode_info_context->mbmi.txfm_size;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
827
828
829
830
831
      *rate           = this_rate;
      *rate_tokenonly = this_rate_tokenonly;
      *distortion     = this_distortion;
      *skippable      = s;
    }
832

833
834
    for (i = 0; i < NB_TXFM_MODES; i++) {
      int64_t adj_rd = this_rd + local_txfm_cache[i] -
835
                       local_txfm_cache[cpi->common.txfm_mode];
836
837
      if (adj_rd < txfm_cache[i]) {
        txfm_cache[i] = adj_rd;
John Koleszar's avatar
John Koleszar committed
838
      }
John Koleszar's avatar
John Koleszar committed
839
    }
John Koleszar's avatar
John Koleszar committed
840
  }
John Koleszar's avatar
John Koleszar committed
841

842
843
  x->e_mbd.mode_info_context->mbmi.mode = mode_selected;
  x->e_mbd.mode_info_context->mbmi.txfm_size = best_tx;
844

John Koleszar's avatar
John Koleszar committed
845
  return best_rd;
John Koleszar's avatar
John Koleszar committed
846
}
Jingning Han's avatar
Jingning Han committed
847

848
849
850
851
static void super_block_uvrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
                                      int *rate, int *distortion,
                                      int *skippable, BLOCK_SIZE_TYPE bsize,
                                      TX_SIZE uv_tx_size) {
852
  MACROBLOCKD *const xd = &x->e_mbd;