vp9_rdopt.c 155 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
11
 */

#include <assert.h>
12
#include <math.h>
13

14
15
16
17
18
19
#include "./vp9_rtcd.h"

#include "vpx_mem/vpx_mem.h"

#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_entropy.h"
20
#include "vp9/common/vp9_entropymode.h"
21
22
23
24
#include "vp9/common/vp9_idct.h"
#include "vp9/common/vp9_mvref_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_quant_common.h"
25
26
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
27
28
29
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_systemdependent.h"

Dmitry Kovalev's avatar
Dmitry Kovalev committed
30
#include "vp9/encoder/vp9_cost.h"
31
#include "vp9/encoder/vp9_encodemb.h"
32
#include "vp9/encoder/vp9_encodemv.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
33
#include "vp9/encoder/vp9_encoder.h"
34
#include "vp9/encoder/vp9_mcomp.h"
35
#include "vp9/encoder/vp9_quantize.h"
36
#include "vp9/encoder/vp9_ratectrl.h"
37
#include "vp9/encoder/vp9_rd.h"
38
39
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_variance.h"
Paul Wilkins's avatar
Paul Wilkins committed
40

41
42
#define RD_THRESH_MAX_FACT 64
#define RD_THRESH_INC      1
43

44
45
46
47
48
49
50
51
#define LAST_FRAME_MODE_MASK    ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | \
                                 (1 << INTRA_FRAME))
#define GOLDEN_FRAME_MODE_MASK  ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | \
                                 (1 << INTRA_FRAME))
#define ALT_REF_MODE_MASK       ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | \
                                 (1 << INTRA_FRAME))

#define SECOND_REF_FRAME_MASK   ((1 << ALTREF_FRAME) | 0x01)
52

Paul Wilkins's avatar
Paul Wilkins committed
53
54
#define MIN_EARLY_TERM_INDEX    3

55
typedef struct {
56
  PREDICTION_MODE mode;
57
58
59
60
61
62
63
  MV_REFERENCE_FRAME ref_frame[2];
} MODE_DEFINITION;

typedef struct {
  MV_REFERENCE_FRAME ref_frame[2];
} REF_DEFINITION;

Alex Converse's avatar
Alex Converse committed
64
65
66
67
68
69
70
71
72
73
74
75
76
struct rdcost_block_args {
  MACROBLOCK *x;
  ENTROPY_CONTEXT t_above[16];
  ENTROPY_CONTEXT t_left[16];
  int rate;
  int64_t dist;
  int64_t sse;
  int this_rate;
  int64_t this_dist;
  int64_t this_sse;
  int64_t this_rd;
  int64_t best_rd;
  int skip;
77
  int use_fast_coef_costing;
78
  const scan_order *so;
Alex Converse's avatar
Alex Converse committed
79
80
};

81
static const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
82
83
84
85
86
87
88
89
90
91
92
93
  {NEARESTMV, {LAST_FRAME,   NONE}},
  {NEARESTMV, {ALTREF_FRAME, NONE}},
  {NEARESTMV, {GOLDEN_FRAME, NONE}},

  {DC_PRED,   {INTRA_FRAME,  NONE}},

  {NEWMV,     {LAST_FRAME,   NONE}},
  {NEWMV,     {ALTREF_FRAME, NONE}},
  {NEWMV,     {GOLDEN_FRAME, NONE}},

  {NEARMV,    {LAST_FRAME,   NONE}},
  {NEARMV,    {ALTREF_FRAME, NONE}},
Jingning Han's avatar
Jingning Han committed
94
95
96
97
98
99
  {NEARMV,    {GOLDEN_FRAME, NONE}},

  {ZEROMV,    {LAST_FRAME,   NONE}},
  {ZEROMV,    {GOLDEN_FRAME, NONE}},
  {ZEROMV,    {ALTREF_FRAME, NONE}},

100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
  {NEARESTMV, {LAST_FRAME,   ALTREF_FRAME}},
  {NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}},

  {TM_PRED,   {INTRA_FRAME,  NONE}},

  {NEARMV,    {LAST_FRAME,   ALTREF_FRAME}},
  {NEWMV,     {LAST_FRAME,   ALTREF_FRAME}},
  {NEARMV,    {GOLDEN_FRAME, ALTREF_FRAME}},
  {NEWMV,     {GOLDEN_FRAME, ALTREF_FRAME}},

  {ZEROMV,    {LAST_FRAME,   ALTREF_FRAME}},
  {ZEROMV,    {GOLDEN_FRAME, ALTREF_FRAME}},

  {H_PRED,    {INTRA_FRAME,  NONE}},
  {V_PRED,    {INTRA_FRAME,  NONE}},
  {D135_PRED, {INTRA_FRAME,  NONE}},
  {D207_PRED, {INTRA_FRAME,  NONE}},
  {D153_PRED, {INTRA_FRAME,  NONE}},
  {D63_PRED,  {INTRA_FRAME,  NONE}},
  {D117_PRED, {INTRA_FRAME,  NONE}},
  {D45_PRED,  {INTRA_FRAME,  NONE}},
121
122
};

123
static const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
124
125
126
127
128
129
  {{LAST_FRAME,   NONE}},
  {{GOLDEN_FRAME, NONE}},
  {{ALTREF_FRAME, NONE}},
  {{LAST_FRAME,   ALTREF_FRAME}},
  {{GOLDEN_FRAME, ALTREF_FRAME}},
  {{INTRA_FRAME,  NONE}},
John Koleszar's avatar
John Koleszar committed
130
131
};

132
133
static int raster_block_offset(BLOCK_SIZE plane_bsize,
                               int raster_block, int stride) {
134
  const int bw = b_width_log2_lookup[plane_bsize];
135
136
137
138
139
140
  const int y = 4 * (raster_block >> bw);
  const int x = 4 * (raster_block & ((1 << bw) - 1));
  return y * stride + x;
}
static int16_t* raster_block_offset_int16(BLOCK_SIZE plane_bsize,
                                          int raster_block, int16_t *base) {
141
  const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
142
143
144
  return base + raster_block_offset(plane_bsize, raster_block, stride);
}

145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
                           int m, int n, int min_plane, int max_plane) {
  int i;

  for (i = min_plane; i < max_plane; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &x->e_mbd.plane[i];

    p->coeff    = ctx->coeff_pbuf[i][m];
    p->qcoeff   = ctx->qcoeff_pbuf[i][m];
    pd->dqcoeff = ctx->dqcoeff_pbuf[i][m];
    p->eobs     = ctx->eobs_pbuf[i][m];

    ctx->coeff_pbuf[i][m]   = ctx->coeff_pbuf[i][n];
    ctx->qcoeff_pbuf[i][m]  = ctx->qcoeff_pbuf[i][n];
    ctx->dqcoeff_pbuf[i][m] = ctx->dqcoeff_pbuf[i][n];
    ctx->eobs_pbuf[i][m]    = ctx->eobs_pbuf[i][n];

    ctx->coeff_pbuf[i][n]   = p->coeff;
    ctx->qcoeff_pbuf[i][n]  = p->qcoeff;
    ctx->dqcoeff_pbuf[i][n] = pd->dqcoeff;
    ctx->eobs_pbuf[i][n]    = p->eobs;
  }
}

170
static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
Deb Mukherjee's avatar
Deb Mukherjee committed
171
172
173
174
175
                            MACROBLOCK *x, MACROBLOCKD *xd,
                            int *out_rate_sum, int64_t *out_dist_sum) {
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
176
177
178
  int i;
  int64_t rate_sum = 0;
  int64_t dist_sum = 0;
hkuang's avatar
hkuang committed
179
  const int ref = xd->mi[0].src_mi->mbmi.ref_frame[0];
180
  unsigned int sse;
181
  unsigned int var = 0;
182
  unsigned int sum_sse = 0;
183
  const int shift = 6;
184
185
186
187
  int rate;
  int64_t dist;

  x->pred_sse[ref] = 0;
Deb Mukherjee's avatar
Deb Mukherjee committed
188
189
190
191

  for (i = 0; i < MAX_MB_PLANE; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &xd->plane[i];
192
    const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
193
194
195
196
197
198
199
200
    const TX_SIZE max_tx_size = max_txsize_lookup[bs];
    const BLOCK_SIZE unit_size = txsize_to_bsize[max_tx_size];
    int bw = 1 << (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
    int bh = 1 << (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
    int idx, idy;
    int lw = b_width_log2_lookup[unit_size] + 2;
    int lh = b_height_log2_lookup[unit_size] + 2;

201
    sum_sse = 0;
202
203
204
205
206

    for (idy = 0; idy < bh; ++idy) {
      for (idx = 0; idx < bw; ++idx) {
        uint8_t *src = p->src.buf + (idy * p->src.stride << lh) + (idx << lw);
        uint8_t *dst = pd->dst.buf + (idy * pd->dst.stride << lh) + (idx << lh);
207
208
209
210
211
212
213
        int block_idx = (idy << 1) + idx;

        var = cpi->fn_ptr[unit_size].vf(src, p->src.stride,
                                        dst, pd->dst.stride, &sse);
        x->bsse[(i << 2) + block_idx] = sse;
        sum_sse += sse;

214
        x->skip_txfm[(i << 2) + block_idx] = 0;
215
        if (!x->select_tx_size) {
216
217
          // Check if all ac coefficients can be quantized to zero.
          if (var < p->quant_thred[1] >> shift) {
218
            x->skip_txfm[(i << 2) + block_idx] = 2;
219
220
221
222
223

            // Check if dc coefficient can be quantized to zero.
            if (sse - var < p->quant_thred[0] >> shift)
              x->skip_txfm[(i << 2) + block_idx] = 1;
          }
224
        }
225
226
227
228
229

        if (i == 0)
          x->pred_sse[ref] += sse;
      }
    }
230

231
    // Fast approximate the modelling function.
232
    if (cpi->oxcf.speed > 4) {
233
      int64_t rate;
234
      const int64_t square_error = sum_sse;
235
      int quantizer = (pd->dequant[1] >> 3);
236
237
238
239
240
#if CONFIG_VP9_HIGHBITDEPTH
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        quantizer >>= (xd->bd - 8);
      }
#endif  // CONFIG_VP9_HIGHBITDEPTH
241

242
243
      if (quantizer < 120)
        rate = (square_error * (280 - quantizer)) >> 8;
244
245
246
247
248
      else
        rate = 0;
      dist = (square_error * quantizer) >> 8;
      rate_sum += rate;
      dist_sum += dist;
249
    } else {
250
251
#if CONFIG_VP9_HIGHBITDEPTH
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
Deb Mukherjee's avatar
Deb Mukherjee committed
252
        vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],
253
254
255
                                     pd->dequant[1] >> (xd->bd - 5),
                                     &rate, &dist);
      } else {
Deb Mukherjee's avatar
Deb Mukherjee committed
256
        vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],
257
258
259
                                     pd->dequant[1] >> 3, &rate, &dist);
      }
#else
260
      vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],
261
                                   pd->dequant[1] >> 3, &rate, &dist);
262
#endif  // CONFIG_VP9_HIGHBITDEPTH
263
      rate_sum += rate;
264
      dist_sum += dist;
265
    }
Deb Mukherjee's avatar
Deb Mukherjee committed
266
267
  }

268
269
  *out_rate_sum = (int)rate_sum;
  *out_dist_sum = dist_sum << 4;
Deb Mukherjee's avatar
Deb Mukherjee committed
270
271
}

272
int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
273
                          intptr_t block_size, int64_t *ssz) {
Ronald S. Bultje's avatar
Ronald S. Bultje committed
274
  int i;
275
  int64_t error = 0, sqcoeff = 0;
John Koleszar's avatar
John Koleszar committed
276

277
  for (i = 0; i < block_size; i++) {
278
279
280
    const int diff = coeff[i] - dqcoeff[i];
    error +=  diff * diff;
    sqcoeff += coeff[i] * coeff[i];
John Koleszar's avatar
John Koleszar committed
281
  }
John Koleszar's avatar
John Koleszar committed
282

283
  *ssz = sqcoeff;
John Koleszar's avatar
John Koleszar committed
284
  return error;
John Koleszar's avatar
John Koleszar committed
285
286
}

287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311

#if CONFIG_VP9_HIGHBITDEPTH
int64_t vp9_high_block_error_c(const tran_low_t *coeff,
                               const tran_low_t *dqcoeff,
                               intptr_t block_size,
                               int64_t *ssz, int bd) {
  int i;
  int64_t error = 0, sqcoeff = 0;
  int shift = 2 * (bd - 8);
  int rounding = shift > 0 ? 1 << (shift - 1) : 0;

  for (i = 0; i < block_size; i++) {
    const int64_t diff = coeff[i] - dqcoeff[i];
    error +=  diff * diff;
    sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
  }
  assert(error >= 0 && sqcoeff >= 0);
  error = (error + rounding) >> shift;
  sqcoeff = (sqcoeff + rounding) >> shift;

  *ssz = sqcoeff;
  return error;
}
#endif  // CONFIG_VP9_HIGHBITDEPTH

312
313
314
315
316
/* The trailing '0' is a terminator which is used inside cost_coeffs() to
 * decide whether to include cost of a trailing EOB node or not (i.e. we
 * can skip this if the last coefficient in this transform block, e.g. the
 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
 * were non-zero). */
317
static const int16_t band_counts[TX_SIZES][8] = {
318
319
320
321
  { 1, 2, 3, 4,  3,   16 - 13, 0 },
  { 1, 2, 3, 4, 11,   64 - 21, 0 },
  { 1, 2, 3, 4, 11,  256 - 21, 0 },
  { 1, 2, 3, 4, 11, 1024 - 21, 0 },
322
};
323
static INLINE int cost_coeffs(MACROBLOCK *x,
324
                              int plane, int block,
325
                              ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
John Koleszar's avatar
John Koleszar committed
326
                              TX_SIZE tx_size,
327
328
                              const int16_t *scan, const int16_t *nb,
                              int use_fast_coef_costing) {
329
  MACROBLOCKD *const xd = &x->e_mbd;
hkuang's avatar
hkuang committed
330
  MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
331
332
  const struct macroblock_plane *p = &x->plane[plane];
  const struct macroblockd_plane *pd = &xd->plane[plane];
333
  const PLANE_TYPE type = pd->plane_type;
334
  const int16_t *band_count = &band_counts[tx_size][1];
335
  const int eob = p->eobs[block];
336
  const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
337
  unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
Dmitry Kovalev's avatar
Dmitry Kovalev committed
338
                   x->token_costs[tx_size][type][is_inter_block(mbmi)];
339
  uint8_t token_cache[32 * 32];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
340
  int pt = combine_entropy_contexts(*A, *L);
341
  int c, cost;
342
  // Check for consistency of tx_size with mode info
343
  assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
344
                              : get_uv_tx_size(mbmi, pd) == tx_size);
345

346
347
  if (eob == 0) {
    // single eob token
348
    cost = token_costs[0][0][pt][EOB_TOKEN];
349
    c = 0;
350
  } else {
351
    int band_left = *band_count++;
352
353

    // dc token
Dmitry Kovalev's avatar
Dmitry Kovalev committed
354
    int v = qcoeff[0];
355
    int prev_t = vp9_dct_value_tokens_ptr[v].token;
356
    cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
357
    token_cache[0] = vp9_pt_energy_class[prev_t];
358
    ++token_costs;
359
360
361
362

    // ac tokens
    for (c = 1; c < eob; c++) {
      const int rc = scan[c];
363
      int t;
364

Dmitry Kovalev's avatar
Dmitry Kovalev committed
365
      v = qcoeff[rc];
366
      t = vp9_dct_value_tokens_ptr[v].token;
367
368
369
      if (use_fast_coef_costing) {
        cost += (*token_costs)[!prev_t][!prev_t][t] + vp9_dct_value_cost_ptr[v];
      } else {
370
        pt = get_coef_context(nb, token_cache, c);
371
        cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
372
        token_cache[rc] = vp9_pt_energy_class[t];
373
      }
374
      prev_t = t;
375
      if (!--band_left) {
376
377
        band_left = *band_count++;
        ++token_costs;
378
      }
379
    }
380
381

    // eob token
382
    if (band_left) {
383
384
385
      if (use_fast_coef_costing) {
        cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
      } else {
386
        pt = get_coef_context(nb, token_cache, c);
387
388
        cost += (*token_costs)[0][pt][EOB_TOKEN];
      }
389
    }
390
391
  }

392
  // is eob first coefficient;
393
  *A = *L = (c > 0);
394

395
396
  return cost;
}
397
398
399
400
401

#if CONFIG_VP9_HIGHBITDEPTH
static void dist_block(int plane, int block, TX_SIZE tx_size,
                       struct rdcost_block_args* args, int bd) {
#else
Alex Converse's avatar
Alex Converse committed
402
403
static void dist_block(int plane, int block, TX_SIZE tx_size,
                       struct rdcost_block_args* args) {
404
#endif  // CONFIG_VP9_HIGHBITDEPTH
405
  const int ss_txfrm_size = tx_size << 1;
Deb Mukherjee's avatar
Deb Mukherjee committed
406
407
  MACROBLOCK* const x = args->x;
  MACROBLOCKD* const xd = &x->e_mbd;
408
409
  const struct macroblock_plane *const p = &x->plane[plane];
  const struct macroblockd_plane *const pd = &xd->plane[plane];
Deb Mukherjee's avatar
Deb Mukherjee committed
410
  int64_t this_sse;
Alex Converse's avatar
Alex Converse committed
411
  int shift = tx_size == TX_32X32 ? 0 : 2;
412
413
  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
414
415
416
417
#if CONFIG_VP9_HIGHBITDEPTH
  args->dist = vp9_high_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
                                    &this_sse, bd) >> shift;
#else
418
419
  args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
                               &this_sse) >> shift;
420
#endif  // CONFIG_VP9_HIGHBITDEPTH
421
  args->sse  = this_sse >> shift;
422

hkuang's avatar
hkuang committed
423
  if (x->skip_encode && !is_inter_block(&xd->mi[0].src_mi->mbmi)) {
424
425
    // TODO(jingning): tune the model to better capture the distortion.
    int64_t p = (pd->dequant[1] * pd->dequant[1] *
426
                    (1 << ss_txfrm_size)) >> (shift + 2);
427
428
429
430
431
#if CONFIG_VP9_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      p >>= ((xd->bd - 8) * 2);
    }
#endif  // CONFIG_VP9_HIGHBITDEPTH
432
433
    args->dist += (p >> 4);
    args->sse  += p;
434
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
435
436
}

437
static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
Alex Converse's avatar
Alex Converse committed
438
                       TX_SIZE tx_size, struct rdcost_block_args* args) {
Deb Mukherjee's avatar
Deb Mukherjee committed
439
  int x_idx, y_idx;
Alex Converse's avatar
Alex Converse committed
440
  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x_idx, &y_idx);
Deb Mukherjee's avatar
Deb Mukherjee committed
441

442
  args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx,
Alex Converse's avatar
Alex Converse committed
443
                           args->t_left + y_idx, tx_size,
444
445
                           args->so->scan, args->so->neighbors,
                           args->use_fast_coef_costing);
Deb Mukherjee's avatar
Deb Mukherjee committed
446
447
}

448
449
static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
                          TX_SIZE tx_size, void *arg) {
Deb Mukherjee's avatar
Deb Mukherjee committed
450
451
452
  struct rdcost_block_args *args = arg;
  MACROBLOCK *const x = args->x;
  MACROBLOCKD *const xd = &x->e_mbd;
hkuang's avatar
hkuang committed
453
  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
454
  int64_t rd1, rd2, rd;
Deb Mukherjee's avatar
Deb Mukherjee committed
455

456
457
458
  if (args->skip)
    return;

459
  if (!is_inter_block(mbmi)) {
460
    vp9_encode_block_intra(x, plane, block, plane_bsize, tx_size, &mbmi->skip);
461
462
463
464
465
466
467
#if CONFIG_VP9_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      dist_block(plane, block, tx_size, args, xd->bd);
    } else {
      dist_block(plane, block, tx_size, args, 8);
    }
#else
468
    dist_block(plane, block, tx_size, args);
469
#endif  // CONFIG_VP9_HIGHBITDEPTH
470
471
  } else if (max_txsize_lookup[plane_bsize] == tx_size) {
    if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 0) {
472
473
      // full forward transform and quantization
      vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
474
475
476
477
478
479
480
#if CONFIG_VP9_HIGHBITDEPTH
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        dist_block(plane, block, tx_size, args, xd->bd);
      } else {
        dist_block(plane, block, tx_size, args, 8);
      }
#else
481
      dist_block(plane, block, tx_size, args);
482
#endif  // CONFIG_VP9_HIGHBITDEPTH
483
    } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 2) {
484
      // compute DC coefficient
485
486
      tran_low_t *const coeff   = BLOCK_OFFSET(x->plane[plane].coeff, block);
      tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
487
      vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size);
488
      args->sse  = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
489
      args->dist = args->sse;
490
491
492
      if (x->plane[plane].eobs[block]) {
        int64_t dc_correct = coeff[0] * coeff[0] -
            (coeff[0] - dqcoeff[0]) * (coeff[0] - dqcoeff[0]);
493
494
495
#if CONFIG_VP9_HIGHBITDEPTH
        dc_correct >>= ((xd->bd - 8) * 2);
#endif
496
497
498
        if (tx_size != TX_32X32)
          dc_correct >>= 2;

499
        args->dist = MAX(0, args->sse - dc_correct);
500
      }
501
502
503
    } else {
      // skip forward transform
      x->plane[plane].eobs[block] = 0;
504
      args->sse  = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
505
506
      args->dist = args->sse;
    }
507
508
509
  } else {
    // full forward transform and quantization
    vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
510
511
512
513
514
515
516
#if CONFIG_VP9_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      dist_block(plane, block, tx_size, args, xd->bd);
    } else {
      dist_block(plane, block, tx_size, args, 8);
    }
#else
517
    dist_block(plane, block, tx_size, args);
518
#endif  // CONFIG_VP9_HIGHBITDEPTH
519
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
520

521
  rate_block(plane, block, plane_bsize, tx_size, args);
522
523
  rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
  rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
524
525

  // TODO(jingning): temporarily enabled only for luma component
526
  rd = MIN(rd1, rd2);
Yaowu Xu's avatar
Yaowu Xu committed
527
  if (plane == 0)
528
    x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] ||
Yaowu Xu's avatar
Yaowu Xu committed
529
                                    (rd1 > rd2 && !xd->lossless);
530

531
532
533
  args->this_rate += args->rate;
  args->this_dist += args->dist;
  args->this_sse  += args->sse;
534
535
536
537
538
539
  args->this_rd += rd;

  if (args->this_rd > args->best_rd) {
    args->skip = 1;
    return;
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
540
541
}

542
static void txfm_rd_in_plane(MACROBLOCK *x,
543
544
545
                             int *rate, int64_t *distortion,
                             int *skippable, int64_t *sse,
                             int64_t ref_best_rd, int plane,
546
547
                             BLOCK_SIZE bsize, TX_SIZE tx_size,
                             int use_fast_coef_casting) {
Deb Mukherjee's avatar
Deb Mukherjee committed
548
  MACROBLOCKD *const xd = &x->e_mbd;
549
  const struct macroblockd_plane *const pd = &xd->plane[plane];
550
551
  struct rdcost_block_args args;
  vp9_zero(args);
552
553
  args.x = x;
  args.best_rd = ref_best_rd;
554
  args.use_fast_coef_costing = use_fast_coef_casting;
555

556
  if (plane == 0)
hkuang's avatar
hkuang committed
557
    xd->mi[0].src_mi->mbmi.tx_size = tx_size;
558

559
  vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
560

561
  args.so = get_scan(xd, tx_size, pd->plane_type, 0);
Deb Mukherjee's avatar
Deb Mukherjee committed
562

563
  vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
564
565
                                         block_rd_txfm, &args);
  if (args.skip) {
566
567
568
569
570
    *rate       = INT_MAX;
    *distortion = INT64_MAX;
    *sse        = INT64_MAX;
    *skippable  = 0;
  } else {
571
572
573
    *distortion = args.this_dist;
    *rate       = args.this_rate;
    *sse        = args.this_sse;
574
    *skippable  = vp9_is_skippable_in_plane(x, bsize, plane);
575
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
576
577
}

578
579
580
581
582
static void choose_largest_tx_size(VP9_COMP *cpi, MACROBLOCK *x,
                                   int *rate, int64_t *distortion,
                                   int *skip, int64_t *sse,
                                   int64_t ref_best_rd,
                                   BLOCK_SIZE bs) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
583
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
Deb Mukherjee's avatar
Deb Mukherjee committed
584
  VP9_COMMON *const cm = &cpi->common;
Yaowu Xu's avatar
Yaowu Xu committed
585
  const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
Deb Mukherjee's avatar
Deb Mukherjee committed
586
  MACROBLOCKD *const xd = &x->e_mbd;
hkuang's avatar
hkuang committed
587
  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
Yaowu Xu's avatar
Yaowu Xu committed
588
589
590

  mbmi->tx_size = MIN(max_tx_size, largest_tx_size);

Alex Converse's avatar
Alex Converse committed
591
  txfm_rd_in_plane(x, rate, distortion, skip,
592
                   sse, ref_best_rd, 0, bs,
593
                   mbmi->tx_size, cpi->sf.use_fast_coef_costing);
Deb Mukherjee's avatar
Deb Mukherjee committed
594
595
}

596
static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
597
598
599
600
                                   int *rate,
                                   int64_t *distortion,
                                   int *skip,
                                   int64_t *psse,
601
                                   int64_t tx_cache[TX_MODES],
602
                                   int64_t ref_best_rd,
603
                                   BLOCK_SIZE bs) {
604
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
605
606
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
hkuang's avatar
hkuang committed
607
  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
608
  vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
609
610
  int r[TX_SIZES][2], s[TX_SIZES];
  int64_t d[TX_SIZES], sse[TX_SIZES];
611
612
613
614
  int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
                             {INT64_MAX, INT64_MAX},
                             {INT64_MAX, INT64_MAX},
                             {INT64_MAX, INT64_MAX}};
615
  int n, m;
616
  int s0, s1;
617
618
  const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
  int64_t best_rd = INT64_MAX;
619
  TX_SIZE best_tx = max_tx_size;
620

621
  const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
622
623
624
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
625

626
  for (n = max_tx_size; n >= 0;  n--) {
627
628
629
    txfm_rd_in_plane(x, &r[n][0], &d[n], &s[n],
                     &sse[n], ref_best_rd, 0, bs, n,
                     cpi->sf.use_fast_coef_costing);
630
631
    r[n][1] = r[n][0];
    if (r[n][0] < INT_MAX) {
632
      for (m = 0; m <= n - (n == (int) max_tx_size); m++) {
633
634
635
636
637
638
        if (m == n)
          r[n][1] += vp9_cost_zero(tx_probs[m]);
        else
          r[n][1] += vp9_cost_one(tx_probs[m]);
      }
    }
639
640
    if (d[n] == INT64_MAX) {
      rd[n][0] = rd[n][1] = INT64_MAX;
641
    } else if (s[n]) {
642
643
644
645
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
646
    }
647

648
649
650
    // Early termination in transform size search.
    if (cpi->sf.tx_size_search_breakout &&
        (rd[n][1] == INT64_MAX ||
651
        (n < (int) max_tx_size && rd[n][1] > rd[n + 1][1]) ||
652
653
654
        s[n] == 1))
      break;

655
656
657
658
    if (rd[n][1] < best_rd) {
      best_tx = n;
      best_rd = rd[n][1];
    }
659
  }
660
661
  mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
                      best_tx : MIN(max_tx_size, max_mode_tx_size);
662
663


664
665
666
  *distortion = d[mbmi->tx_size];
  *rate       = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT];
  *skip       = s[mbmi->tx_size];
667
  *psse       = sse[mbmi->tx_size];
668

669
670
671
672
  tx_cache[ONLY_4X4] = rd[TX_4X4][0];
  tx_cache[ALLOW_8X8] = rd[TX_8X8][0];
  tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0];
  tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0];
673

674
675
676
677
  if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
    tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
  } else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) {
    tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
Deb Mukherjee's avatar
Deb Mukherjee committed
678
  } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
679
    tx_cache[TX_MODE_SELECT] = rd[TX_8X8][1];
Deb Mukherjee's avatar
Deb Mukherjee committed
680
  } else {
681
    tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1];
682
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
683
}
684

685
686
687
688
689
static void super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
                            int64_t *distortion, int *skip,
                            int64_t *psse, BLOCK_SIZE bs,
                            int64_t txfm_cache[TX_MODES],
                            int64_t ref_best_rd) {
Jim Bankoski's avatar
Jim Bankoski committed
690
  MACROBLOCKD *xd = &x->e_mbd;
691
692
  int64_t sse;
  int64_t *ret_sse = psse ? psse : &sse;
Yaowu Xu's avatar
Yaowu Xu committed
693

hkuang's avatar
hkuang committed
694
  assert(bs == xd->mi[0].src_mi->mbmi.sb_type);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
695

696
  if (cpi->sf.tx_size_search_method == USE_LARGESTALL || xd->lossless) {
697
    vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
698
    choose_largest_tx_size(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd,
699
                           bs);
700
  } else {
701
    choose_tx_size_from_rd(cpi, x, rate, distortion, skip, ret_sse,
702
                           txfm_cache, ref_best_rd, bs);
703
704
705
  }
}

706
707
static int conditional_skipintra(PREDICTION_MODE mode,
                                 PREDICTION_MODE best_intra_mode) {
708
709
710
711
712
713
714
715
  if (mode == D117_PRED &&
      best_intra_mode != V_PRED &&
      best_intra_mode != D135_PRED)
    return 1;
  if (mode == D63_PRED &&
      best_intra_mode != V_PRED &&
      best_intra_mode != D45_PRED)
    return 1;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
716
  if (mode == D207_PRED &&
717
718
719
720
721
722
723
724
725
726
      best_intra_mode != H_PRED &&
      best_intra_mode != D45_PRED)
    return 1;
  if (mode == D153_PRED &&
      best_intra_mode != H_PRED &&
      best_intra_mode != D135_PRED)
    return 1;
  return 0;
}

727
static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
728
                                     PREDICTION_MODE *best_mode,
729
                                     const int *bmode_costs,
730
731
                                     ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                                     int *bestrate, int *bestratey,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
732
                                     int64_t *bestdistortion,
733
                                     BLOCK_SIZE bsize, int64_t rd_thresh) {
734
  PREDICTION_MODE mode;
735
  MACROBLOCKD *const xd = &x->e_mbd;
736
  int64_t best_rd = rd_thresh;
737

738
739
740
  struct macroblock_plane *p = &x->plane[0];
  struct macroblockd_plane *pd = &xd->plane[0];
  const int src_stride = p->src.stride;
741
  const int dst_stride = pd->dst.stride;
742
743
744
745
  const uint8_t *src_init = &p->src.buf[raster_block_offset(BLOCK_8X8, ib,
                                                            src_stride)];
  uint8_t *dst_init = &pd->dst.buf[raster_block_offset(BLOCK_8X8, ib,
                                                       dst_stride)];
746
747
  ENTROPY_CONTEXT ta[2], tempa[2];
  ENTROPY_CONTEXT tl[2], templ[2];
748

Jim Bankoski's avatar
Jim Bankoski committed
749
750
  const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
  const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
751
  int idx, idy;
752
  uint8_t best_dst[8 * 8];
753
754
755
#if CONFIG_VP9_HIGHBITDEPTH
  uint16_t best_dst16[8 * 8];
#endif
John Koleszar's avatar
John Koleszar committed
756

Jingning Han's avatar
Jingning Han committed
757
  assert(ib < 4);
758

759
760
  vpx_memcpy(ta, a, sizeof(ta));
  vpx_memcpy(tl, l, sizeof(tl));
hkuang's avatar
hkuang committed
761
  xd->mi[0].src_mi->mbmi.tx_size = TX_4X4;
762

763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
#if CONFIG_VP9_HIGHBITDEPTH
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
      int64_t this_rd;
      int ratey = 0;
      int64_t distortion = 0;
      int rate = bmode_costs[mode];

      if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
        continue;

      // Only do the oblique modes if the best so far is
      // one of the neighboring directional modes
      if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
        if (conditional_skipintra(mode, *best_mode))
            continue;
      }

      vpx_memcpy(tempa, ta, sizeof(ta));
      vpx_memcpy(templ, tl, sizeof(tl));

      for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
        for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
          const int block = ib + idy * 2 + idx;
          const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
          uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
          int16_t *const src_diff = raster_block_offset_int16(BLOCK_8X8, block,
                                                              p->src_diff);
          tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
          xd->mi[0].src_mi->bmi[block].as_mode = mode;
          vp9_predict_intra_block(xd, block, 1,
                                  TX_4X4, mode,
                                  x->skip_encode ? src : dst,
                                  x->skip_encode ? src_stride : dst_stride,
                                  dst, dst_stride, idx, idy, 0);
          vp9_high_subtract_block(4, 4, src_diff, 8, src, src_stride,
                                  dst, dst_stride, xd->bd);
          if (xd->lossless) {
            const scan_order *so = &vp9_default_scan_orders[TX_4X4];
            vp9_high_fwht4x4(src_diff, coeff, 8);
            vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
            ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
                                 so->scan, so->neighbors,
                                 cpi->sf.use_fast_coef_costing);
            if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
              goto next_highbd;
            vp9_high_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block),
                                 dst, dst_stride,
                                 p->eobs[block], xd->bd);
          } else {
            int64_t unused;
            const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block);
            const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type];
            vp9_high_fht4x4(src_diff, coeff, 8, tx_type);
            vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
            ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
                                 so->scan, so->neighbors,
                                 cpi->sf.use_fast_coef_costing);
            distortion += vp9_high_block_error(coeff,
                                               BLOCK_OFFSET(pd->dqcoeff, block),
                                               16, &unused, xd->bd) >> 2;
            if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
              goto next_highbd;
            vp9_high_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block),
                                dst, dst_stride, p->eobs[block], xd->bd);
          }
        }
      }

      rate += ratey;
      this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);

      if (this_rd < best_rd) {
        *bestrate = rate;
        *bestratey = ratey;
        *bestdistortion = distortion;
        best_rd = this_rd;
        *best_mode = mode;
        vpx_memcpy(a, tempa, sizeof(tempa));
        vpx_memcpy(l, templ, sizeof(templ));
        for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
          vpx_memcpy(best_dst16 + idy * 8,
                     CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
                     num_4x4_blocks_wide * 4 * sizeof(uint16_t));
        }
      }
    next_highbd:
      {}
    }
    if (best_rd >= rd_thresh || x->skip_encode)
      return best_rd;

    for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
      vpx_memcpy(CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
                 best_dst16 + idy * 8,
                 num_4x4_blocks_wide * 4 * sizeof(uint16_t));
    }

    return best_rd;
  }
#endif  // CONFIG_VP9_HIGHBITDEPTH

865
  for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
866
    int64_t this_rd;
867
    int ratey = 0;
868
869
    int64_t distortion = 0;
    int rate = bmode_costs[mode];
870

871
    if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
872
873
      continue;

874
875
    // Only do the oblique modes if the best so far is
    // one of the neighboring directional modes
876
877
    if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
      if (conditional_skipintra(mode, *best_mode))
878
879
          continue;
    }
880

881
882
883
    vpx_memcpy(tempa, ta, sizeof(ta));
    vpx_memcpy(templ, tl, sizeof(tl));

Jim Bankoski's avatar
Jim Bankoski committed
884
885
    for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
      for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
886
        const int block = ib + idy * 2 + idx;
887
888
889
890
        const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
        uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
        int16_t *const src_diff = raster_block_offset_int16(BLOCK_8X8, block,
                                                            p->src_diff);
891
        tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
hkuang's avatar
hkuang committed
892
        xd->mi[0].src_mi->bmi[block].as_mode = mode;
893
        vp9_predict_intra_block(xd, block, 1,
894
                                TX_4X4, mode,
895
896
                                x->skip_encode ? src : dst,
                                x->skip_encode ? src_stride : dst_stride,
897
                                dst, dst_stride, idx, idy, 0);
898
899
900
901
902
903
904
        vp9_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride);

        if (xd->lossless) {
          const scan_order *so = &vp9_default_scan_orders[TX_4X4];
          vp9_fwht4x4(src_diff, coeff, 8);
          vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
          ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
905
906
                               so->scan, so->neighbors,
                               cpi->sf.use_fast_coef_costing);
907
908
909
910
911
912
913
914
          if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
            goto next;
          vp9_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block), dst, dst_stride,
                          p->eobs[block]);
        } else {
          int64_t unused;
          const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block);
          const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type];
915
          vp9_fht4x4(src_diff, coeff, 8, tx_type);
916
917
          vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
          ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
918
919
                             so->scan, so->neighbors,
                             cpi->sf.use_fast_coef_costing);
920
921
922
923
924
925
926
          distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block),
                                        16, &unused) >> 2;
          if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
            goto next;
          vp9_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block),
                         dst, dst_stride, p->eobs[block]);
        }
927
928
      }
    }
Jingning Han's avatar
Jingning Han committed
929

930
931
    rate += ratey;
    this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
Jingning Han's avatar
Jingning Han committed
932