vp9_rdopt.c 154 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
11
 */

#include <assert.h>
12
#include <math.h>
13

14
15
16
17
18
19
#include "./vp9_rtcd.h"

#include "vpx_mem/vpx_mem.h"

#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_entropy.h"
20
#include "vp9/common/vp9_entropymode.h"
21
22
23
24
#include "vp9/common/vp9_idct.h"
#include "vp9/common/vp9_mvref_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_quant_common.h"
25
26
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
27
28
29
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_systemdependent.h"

Dmitry Kovalev's avatar
Dmitry Kovalev committed
30
#include "vp9/encoder/vp9_cost.h"
31
#include "vp9/encoder/vp9_encodemb.h"
32
#include "vp9/encoder/vp9_encodemv.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
33
#include "vp9/encoder/vp9_encoder.h"
34
#include "vp9/encoder/vp9_mcomp.h"
35
#include "vp9/encoder/vp9_quantize.h"
36
#include "vp9/encoder/vp9_ratectrl.h"
37
#include "vp9/encoder/vp9_rd.h"
38
39
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_variance.h"
Paul Wilkins's avatar
Paul Wilkins committed
40

41
42
#define RD_THRESH_MAX_FACT 64
#define RD_THRESH_INC      1
43

44
45
46
47
48
49
50
51
#define LAST_FRAME_MODE_MASK    ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | \
                                 (1 << INTRA_FRAME))
#define GOLDEN_FRAME_MODE_MASK  ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | \
                                 (1 << INTRA_FRAME))
#define ALT_REF_MODE_MASK       ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | \
                                 (1 << INTRA_FRAME))

#define SECOND_REF_FRAME_MASK   ((1 << ALTREF_FRAME) | 0x01)
52

Paul Wilkins's avatar
Paul Wilkins committed
53
54
#define MIN_EARLY_TERM_INDEX    3

55
typedef struct {
56
  PREDICTION_MODE mode;
57
58
59
60
61
62
63
  MV_REFERENCE_FRAME ref_frame[2];
} MODE_DEFINITION;

typedef struct {
  MV_REFERENCE_FRAME ref_frame[2];
} REF_DEFINITION;

Alex Converse's avatar
Alex Converse committed
64
65
66
67
68
69
70
71
72
73
74
75
76
struct rdcost_block_args {
  MACROBLOCK *x;
  ENTROPY_CONTEXT t_above[16];
  ENTROPY_CONTEXT t_left[16];
  int rate;
  int64_t dist;
  int64_t sse;
  int this_rate;
  int64_t this_dist;
  int64_t this_sse;
  int64_t this_rd;
  int64_t best_rd;
  int skip;
77
  int use_fast_coef_costing;
78
  const scan_order *so;
Alex Converse's avatar
Alex Converse committed
79
80
};

81
static const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
82
83
84
85
86
87
88
89
90
91
92
93
  {NEARESTMV, {LAST_FRAME,   NONE}},
  {NEARESTMV, {ALTREF_FRAME, NONE}},
  {NEARESTMV, {GOLDEN_FRAME, NONE}},

  {DC_PRED,   {INTRA_FRAME,  NONE}},

  {NEWMV,     {LAST_FRAME,   NONE}},
  {NEWMV,     {ALTREF_FRAME, NONE}},
  {NEWMV,     {GOLDEN_FRAME, NONE}},

  {NEARMV,    {LAST_FRAME,   NONE}},
  {NEARMV,    {ALTREF_FRAME, NONE}},
Jingning Han's avatar
Jingning Han committed
94
95
96
97
98
99
  {NEARMV,    {GOLDEN_FRAME, NONE}},

  {ZEROMV,    {LAST_FRAME,   NONE}},
  {ZEROMV,    {GOLDEN_FRAME, NONE}},
  {ZEROMV,    {ALTREF_FRAME, NONE}},

100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
  {NEARESTMV, {LAST_FRAME,   ALTREF_FRAME}},
  {NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}},

  {TM_PRED,   {INTRA_FRAME,  NONE}},

  {NEARMV,    {LAST_FRAME,   ALTREF_FRAME}},
  {NEWMV,     {LAST_FRAME,   ALTREF_FRAME}},
  {NEARMV,    {GOLDEN_FRAME, ALTREF_FRAME}},
  {NEWMV,     {GOLDEN_FRAME, ALTREF_FRAME}},

  {ZEROMV,    {LAST_FRAME,   ALTREF_FRAME}},
  {ZEROMV,    {GOLDEN_FRAME, ALTREF_FRAME}},

  {H_PRED,    {INTRA_FRAME,  NONE}},
  {V_PRED,    {INTRA_FRAME,  NONE}},
  {D135_PRED, {INTRA_FRAME,  NONE}},
  {D207_PRED, {INTRA_FRAME,  NONE}},
  {D153_PRED, {INTRA_FRAME,  NONE}},
  {D63_PRED,  {INTRA_FRAME,  NONE}},
  {D117_PRED, {INTRA_FRAME,  NONE}},
  {D45_PRED,  {INTRA_FRAME,  NONE}},
121
122
};

123
static const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
124
125
126
127
128
129
  {{LAST_FRAME,   NONE}},
  {{GOLDEN_FRAME, NONE}},
  {{ALTREF_FRAME, NONE}},
  {{LAST_FRAME,   ALTREF_FRAME}},
  {{GOLDEN_FRAME, ALTREF_FRAME}},
  {{INTRA_FRAME,  NONE}},
John Koleszar's avatar
John Koleszar committed
130
131
};

132
133
134
135
136
137
138
139
140
static int raster_block_offset(BLOCK_SIZE plane_bsize,
                               int raster_block, int stride) {
  const int bw = b_width_log2(plane_bsize);
  const int y = 4 * (raster_block >> bw);
  const int x = 4 * (raster_block & ((1 << bw) - 1));
  return y * stride + x;
}
static int16_t* raster_block_offset_int16(BLOCK_SIZE plane_bsize,
                                          int raster_block, int16_t *base) {
141
  const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
142
143
144
  return base + raster_block_offset(plane_bsize, raster_block, stride);
}

145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
                           int m, int n, int min_plane, int max_plane) {
  int i;

  for (i = min_plane; i < max_plane; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &x->e_mbd.plane[i];

    p->coeff    = ctx->coeff_pbuf[i][m];
    p->qcoeff   = ctx->qcoeff_pbuf[i][m];
    pd->dqcoeff = ctx->dqcoeff_pbuf[i][m];
    p->eobs     = ctx->eobs_pbuf[i][m];

    ctx->coeff_pbuf[i][m]   = ctx->coeff_pbuf[i][n];
    ctx->qcoeff_pbuf[i][m]  = ctx->qcoeff_pbuf[i][n];
    ctx->dqcoeff_pbuf[i][m] = ctx->dqcoeff_pbuf[i][n];
    ctx->eobs_pbuf[i][m]    = ctx->eobs_pbuf[i][n];

    ctx->coeff_pbuf[i][n]   = p->coeff;
    ctx->qcoeff_pbuf[i][n]  = p->qcoeff;
    ctx->dqcoeff_pbuf[i][n] = pd->dqcoeff;
    ctx->eobs_pbuf[i][n]    = p->eobs;
  }
}

170
static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
Deb Mukherjee's avatar
Deb Mukherjee committed
171
172
173
174
175
                            MACROBLOCK *x, MACROBLOCKD *xd,
                            int *out_rate_sum, int64_t *out_dist_sum) {
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
176
177
178
  int i;
  int64_t rate_sum = 0;
  int64_t dist_sum = 0;
hkuang's avatar
hkuang committed
179
  const int ref = xd->mi[0].src_mi->mbmi.ref_frame[0];
180
  unsigned int sse;
181
  unsigned int var = 0;
182
  unsigned int sum_sse = 0;
183
  const int shift = 6;
184
185
186
187
  int rate;
  int64_t dist;

  x->pred_sse[ref] = 0;
Deb Mukherjee's avatar
Deb Mukherjee committed
188
189
190
191

  for (i = 0; i < MAX_MB_PLANE; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &xd->plane[i];
192
    const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
193
194
195
196
197
198
199
200
    const TX_SIZE max_tx_size = max_txsize_lookup[bs];
    const BLOCK_SIZE unit_size = txsize_to_bsize[max_tx_size];
    int bw = 1 << (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
    int bh = 1 << (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
    int idx, idy;
    int lw = b_width_log2_lookup[unit_size] + 2;
    int lh = b_height_log2_lookup[unit_size] + 2;

201
    sum_sse = 0;
202
203
204
205
206

    for (idy = 0; idy < bh; ++idy) {
      for (idx = 0; idx < bw; ++idx) {
        uint8_t *src = p->src.buf + (idy * p->src.stride << lh) + (idx << lw);
        uint8_t *dst = pd->dst.buf + (idy * pd->dst.stride << lh) + (idx << lh);
207
208
209
210
211
212
213
214
        int block_idx = (idy << 1) + idx;

        var = cpi->fn_ptr[unit_size].vf(src, p->src.stride,
                                        dst, pd->dst.stride, &sse);
        x->bsse[(i << 2) + block_idx] = sse;
        sum_sse += sse;

        if (!x->select_tx_size) {
215
216
          // Check if all ac coefficients can be quantized to zero.
          if (var < p->quant_thred[1] >> shift) {
217
            x->skip_txfm[(i << 2) + block_idx] = 2;
218
219
220
221
222

            // Check if dc coefficient can be quantized to zero.
            if (sse - var < p->quant_thred[0] >> shift)
              x->skip_txfm[(i << 2) + block_idx] = 1;
          } else {
223
            x->skip_txfm[(i << 2) + block_idx] = 0;
224
          }
225
        }
226
227
228
229
230

        if (i == 0)
          x->pred_sse[ref] += sse;
      }
    }
231

232
    // Fast approximate the modelling function.
233
    if (cpi->oxcf.speed > 4) {
234
      int64_t rate;
235
236
      int64_t square_error = sse;
      int quantizer = (pd->dequant[1] >> 3);
237
238
239
240
241
#if CONFIG_VP9_HIGHBITDEPTH
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        quantizer >>= (xd->bd - 8);
      }
#endif  // CONFIG_VP9_HIGHBITDEPTH
242

243
244
      if (quantizer < 120)
        rate = (square_error * (280 - quantizer)) >> 8;
245
246
247
248
249
      else
        rate = 0;
      dist = (square_error * quantizer) >> 8;
      rate_sum += rate;
      dist_sum += dist;
250
    } else {
251
252
#if CONFIG_VP9_HIGHBITDEPTH
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
Deb Mukherjee's avatar
Deb Mukherjee committed
253
        vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],
254
255
256
                                     pd->dequant[1] >> (xd->bd - 5),
                                     &rate, &dist);
      } else {
Deb Mukherjee's avatar
Deb Mukherjee committed
257
        vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],
258
259
260
                                     pd->dequant[1] >> 3, &rate, &dist);
      }
#else
261
      vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],
262
                                   pd->dequant[1] >> 3, &rate, &dist);
263
#endif  // CONFIG_VP9_HIGHBITDEPTH
264
      rate_sum += rate;
265
      dist_sum += dist;
266
    }
Deb Mukherjee's avatar
Deb Mukherjee committed
267
268
  }

269
270
  *out_rate_sum = (int)rate_sum;
  *out_dist_sum = dist_sum << 4;
Deb Mukherjee's avatar
Deb Mukherjee committed
271
272
}

273
int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
274
                          intptr_t block_size, int64_t *ssz) {
Ronald S. Bultje's avatar
Ronald S. Bultje committed
275
  int i;
276
  int64_t error = 0, sqcoeff = 0;
John Koleszar's avatar
John Koleszar committed
277

278
  for (i = 0; i < block_size; i++) {
279
280
281
    const int diff = coeff[i] - dqcoeff[i];
    error +=  diff * diff;
    sqcoeff += coeff[i] * coeff[i];
John Koleszar's avatar
John Koleszar committed
282
  }
John Koleszar's avatar
John Koleszar committed
283

284
  *ssz = sqcoeff;
John Koleszar's avatar
John Koleszar committed
285
  return error;
John Koleszar's avatar
John Koleszar committed
286
287
}

288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312

#if CONFIG_VP9_HIGHBITDEPTH
int64_t vp9_high_block_error_c(const tran_low_t *coeff,
                               const tran_low_t *dqcoeff,
                               intptr_t block_size,
                               int64_t *ssz, int bd) {
  int i;
  int64_t error = 0, sqcoeff = 0;
  int shift = 2 * (bd - 8);
  int rounding = shift > 0 ? 1 << (shift - 1) : 0;

  for (i = 0; i < block_size; i++) {
    const int64_t diff = coeff[i] - dqcoeff[i];
    error +=  diff * diff;
    sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
  }
  assert(error >= 0 && sqcoeff >= 0);
  error = (error + rounding) >> shift;
  sqcoeff = (sqcoeff + rounding) >> shift;

  *ssz = sqcoeff;
  return error;
}
#endif  // CONFIG_VP9_HIGHBITDEPTH

313
314
315
316
317
/* The trailing '0' is a terminator which is used inside cost_coeffs() to
 * decide whether to include cost of a trailing EOB node or not (i.e. we
 * can skip this if the last coefficient in this transform block, e.g. the
 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
 * were non-zero). */
318
static const int16_t band_counts[TX_SIZES][8] = {
319
320
321
322
  { 1, 2, 3, 4,  3,   16 - 13, 0 },
  { 1, 2, 3, 4, 11,   64 - 21, 0 },
  { 1, 2, 3, 4, 11,  256 - 21, 0 },
  { 1, 2, 3, 4, 11, 1024 - 21, 0 },
323
};
324
static INLINE int cost_coeffs(MACROBLOCK *x,
325
                              int plane, int block,
326
                              ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
John Koleszar's avatar
John Koleszar committed
327
                              TX_SIZE tx_size,
328
329
                              const int16_t *scan, const int16_t *nb,
                              int use_fast_coef_costing) {
330
  MACROBLOCKD *const xd = &x->e_mbd;
hkuang's avatar
hkuang committed
331
  MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
332
333
  const struct macroblock_plane *p = &x->plane[plane];
  const struct macroblockd_plane *pd = &xd->plane[plane];
334
  const PLANE_TYPE type = pd->plane_type;
335
  const int16_t *band_count = &band_counts[tx_size][1];
336
  const int eob = p->eobs[block];
337
  const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
338
  unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
Dmitry Kovalev's avatar
Dmitry Kovalev committed
339
                   x->token_costs[tx_size][type][is_inter_block(mbmi)];
340
  uint8_t token_cache[32 * 32];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
341
  int pt = combine_entropy_contexts(*A, *L);
342
  int c, cost;
343
  // Check for consistency of tx_size with mode info
344
  assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
345
                              : get_uv_tx_size(mbmi, pd) == tx_size);
346

347
348
  if (eob == 0) {
    // single eob token
349
    cost = token_costs[0][0][pt][EOB_TOKEN];
350
    c = 0;
351
  } else {
352
    int band_left = *band_count++;
353
354

    // dc token
Dmitry Kovalev's avatar
Dmitry Kovalev committed
355
    int v = qcoeff[0];
356
    int prev_t = vp9_dct_value_tokens_ptr[v].token;
357
    cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
358
    token_cache[0] = vp9_pt_energy_class[prev_t];
359
    ++token_costs;
360
361
362
363

    // ac tokens
    for (c = 1; c < eob; c++) {
      const int rc = scan[c];
364
      int t;
365

Dmitry Kovalev's avatar
Dmitry Kovalev committed
366
      v = qcoeff[rc];
367
      t = vp9_dct_value_tokens_ptr[v].token;
368
369
370
      if (use_fast_coef_costing) {
        cost += (*token_costs)[!prev_t][!prev_t][t] + vp9_dct_value_cost_ptr[v];
      } else {
371
        pt = get_coef_context(nb, token_cache, c);
372
        cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
373
        token_cache[rc] = vp9_pt_energy_class[t];
374
      }
375
      prev_t = t;
376
      if (!--band_left) {
377
378
        band_left = *band_count++;
        ++token_costs;
379
      }
380
    }
381
382

    // eob token
383
    if (band_left) {
384
385
386
      if (use_fast_coef_costing) {
        cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
      } else {
387
        pt = get_coef_context(nb, token_cache, c);
388
389
        cost += (*token_costs)[0][pt][EOB_TOKEN];
      }
390
    }
391
392
  }

393
  // is eob first coefficient;
394
  *A = *L = (c > 0);
395

396
397
  return cost;
}
398
399
400
401
402

#if CONFIG_VP9_HIGHBITDEPTH
static void dist_block(int plane, int block, TX_SIZE tx_size,
                       struct rdcost_block_args* args, int bd) {
#else
Alex Converse's avatar
Alex Converse committed
403
404
static void dist_block(int plane, int block, TX_SIZE tx_size,
                       struct rdcost_block_args* args) {
405
#endif  // CONFIG_VP9_HIGHBITDEPTH
406
  const int ss_txfrm_size = tx_size << 1;
Deb Mukherjee's avatar
Deb Mukherjee committed
407
408
  MACROBLOCK* const x = args->x;
  MACROBLOCKD* const xd = &x->e_mbd;
409
410
  const struct macroblock_plane *const p = &x->plane[plane];
  const struct macroblockd_plane *const pd = &xd->plane[plane];
Deb Mukherjee's avatar
Deb Mukherjee committed
411
  int64_t this_sse;
Alex Converse's avatar
Alex Converse committed
412
  int shift = tx_size == TX_32X32 ? 0 : 2;
413
414
  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
415
416
417
418
#if CONFIG_VP9_HIGHBITDEPTH
  args->dist = vp9_high_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
                                    &this_sse, bd) >> shift;
#else
419
420
  args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
                               &this_sse) >> shift;
421
#endif  // CONFIG_VP9_HIGHBITDEPTH
422
  args->sse  = this_sse >> shift;
423

hkuang's avatar
hkuang committed
424
  if (x->skip_encode && !is_inter_block(&xd->mi[0].src_mi->mbmi)) {
425
426
    // TODO(jingning): tune the model to better capture the distortion.
    int64_t p = (pd->dequant[1] * pd->dequant[1] *
427
                    (1 << ss_txfrm_size)) >> (shift + 2);
428
429
430
431
432
#if CONFIG_VP9_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      p >>= ((xd->bd - 8) * 2);
    }
#endif  // CONFIG_VP9_HIGHBITDEPTH
433
434
    args->dist += (p >> 4);
    args->sse  += p;
435
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
436
437
}

438
static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
Alex Converse's avatar
Alex Converse committed
439
                       TX_SIZE tx_size, struct rdcost_block_args* args) {
Deb Mukherjee's avatar
Deb Mukherjee committed
440
  int x_idx, y_idx;
Alex Converse's avatar
Alex Converse committed
441
  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x_idx, &y_idx);
Deb Mukherjee's avatar
Deb Mukherjee committed
442

443
  args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx,
Alex Converse's avatar
Alex Converse committed
444
                           args->t_left + y_idx, tx_size,
445
446
                           args->so->scan, args->so->neighbors,
                           args->use_fast_coef_costing);
Deb Mukherjee's avatar
Deb Mukherjee committed
447
448
}

449
450
static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
                          TX_SIZE tx_size, void *arg) {
Deb Mukherjee's avatar
Deb Mukherjee committed
451
452
453
  struct rdcost_block_args *args = arg;
  MACROBLOCK *const x = args->x;
  MACROBLOCKD *const xd = &x->e_mbd;
hkuang's avatar
hkuang committed
454
  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
455
  int64_t rd1, rd2, rd;
Deb Mukherjee's avatar
Deb Mukherjee committed
456

457
458
459
  if (args->skip)
    return;

460
  if (!is_inter_block(mbmi)) {
461
    vp9_encode_block_intra(x, plane, block, plane_bsize, tx_size, &mbmi->skip);
462
463
464
465
466
467
468
#if CONFIG_VP9_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      dist_block(plane, block, tx_size, args, xd->bd);
    } else {
      dist_block(plane, block, tx_size, args, 8);
    }
#else
469
    dist_block(plane, block, tx_size, args);
470
#endif  // CONFIG_VP9_HIGHBITDEPTH
471
472
  } else if (max_txsize_lookup[plane_bsize] == tx_size) {
    if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 0) {
473
474
      // full forward transform and quantization
      vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
475
476
477
478
479
480
481
#if CONFIG_VP9_HIGHBITDEPTH
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        dist_block(plane, block, tx_size, args, xd->bd);
      } else {
        dist_block(plane, block, tx_size, args, 8);
      }
#else
482
      dist_block(plane, block, tx_size, args);
483
#endif  // CONFIG_VP9_HIGHBITDEPTH
484
    } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 2) {
485
      // compute DC coefficient
486
487
      tran_low_t *const coeff   = BLOCK_OFFSET(x->plane[plane].coeff, block);
      tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
488
      vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size);
489
      args->sse  = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
490
      args->dist = args->sse;
491
492
493
      if (x->plane[plane].eobs[block]) {
        int64_t dc_correct = coeff[0] * coeff[0] -
            (coeff[0] - dqcoeff[0]) * (coeff[0] - dqcoeff[0]);
494
495
496
#if CONFIG_VP9_HIGHBITDEPTH
        dc_correct >>= ((xd->bd - 8) * 2);
#endif
497
498
499
500
501
        if (tx_size != TX_32X32)
          dc_correct >>= 2;

        args->dist = args->sse - dc_correct;
      }
502
503
504
    } else {
      // skip forward transform
      x->plane[plane].eobs[block] = 0;
505
      args->sse  = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
506
507
      args->dist = args->sse;
    }
508
509
510
  } else {
    // full forward transform and quantization
    vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
511
512
513
514
515
516
517
#if CONFIG_VP9_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      dist_block(plane, block, tx_size, args, xd->bd);
    } else {
      dist_block(plane, block, tx_size, args, 8);
    }
#else
518
    dist_block(plane, block, tx_size, args);
519
#endif  // CONFIG_VP9_HIGHBITDEPTH
520
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
521

522
  rate_block(plane, block, plane_bsize, tx_size, args);
523
524
  rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
  rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
525
526

  // TODO(jingning): temporarily enabled only for luma component
527
  rd = MIN(rd1, rd2);
Yaowu Xu's avatar
Yaowu Xu committed
528
  if (plane == 0)
529
    x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] ||
Yaowu Xu's avatar
Yaowu Xu committed
530
                                    (rd1 > rd2 && !xd->lossless);
531

532
533
534
  args->this_rate += args->rate;
  args->this_dist += args->dist;
  args->this_sse  += args->sse;
535
536
537
538
539
540
  args->this_rd += rd;

  if (args->this_rd > args->best_rd) {
    args->skip = 1;
    return;
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
541
542
}

543
static void txfm_rd_in_plane(MACROBLOCK *x,
544
545
546
                             int *rate, int64_t *distortion,
                             int *skippable, int64_t *sse,
                             int64_t ref_best_rd, int plane,
547
548
                             BLOCK_SIZE bsize, TX_SIZE tx_size,
                             int use_fast_coef_casting) {
Deb Mukherjee's avatar
Deb Mukherjee committed
549
  MACROBLOCKD *const xd = &x->e_mbd;
550
  const struct macroblockd_plane *const pd = &xd->plane[plane];
551
552
  struct rdcost_block_args args;
  vp9_zero(args);
553
554
  args.x = x;
  args.best_rd = ref_best_rd;
555
  args.use_fast_coef_costing = use_fast_coef_casting;
556

557
  if (plane == 0)
hkuang's avatar
hkuang committed
558
    xd->mi[0].src_mi->mbmi.tx_size = tx_size;
559

560
  vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
561

562
  args.so = get_scan(xd, tx_size, pd->plane_type, 0);
Deb Mukherjee's avatar
Deb Mukherjee committed
563

564
  vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
565
566
                                         block_rd_txfm, &args);
  if (args.skip) {
567
568
569
570
571
    *rate       = INT_MAX;
    *distortion = INT64_MAX;
    *sse        = INT64_MAX;
    *skippable  = 0;
  } else {
572
573
574
    *distortion = args.this_dist;
    *rate       = args.this_rate;
    *sse        = args.this_sse;
575
    *skippable  = vp9_is_skippable_in_plane(x, bsize, plane);
576
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
577
578
}

579
580
581
582
583
static void choose_largest_tx_size(VP9_COMP *cpi, MACROBLOCK *x,
                                   int *rate, int64_t *distortion,
                                   int *skip, int64_t *sse,
                                   int64_t ref_best_rd,
                                   BLOCK_SIZE bs) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
584
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
Deb Mukherjee's avatar
Deb Mukherjee committed
585
  VP9_COMMON *const cm = &cpi->common;
Yaowu Xu's avatar
Yaowu Xu committed
586
  const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
Deb Mukherjee's avatar
Deb Mukherjee committed
587
  MACROBLOCKD *const xd = &x->e_mbd;
hkuang's avatar
hkuang committed
588
  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
Yaowu Xu's avatar
Yaowu Xu committed
589
590
591

  mbmi->tx_size = MIN(max_tx_size, largest_tx_size);

Alex Converse's avatar
Alex Converse committed
592
  txfm_rd_in_plane(x, rate, distortion, skip,
593
                   sse, ref_best_rd, 0, bs,
594
                   mbmi->tx_size, cpi->sf.use_fast_coef_costing);
Deb Mukherjee's avatar
Deb Mukherjee committed
595
596
}

597
static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
598
599
600
601
                                   int *rate,
                                   int64_t *distortion,
                                   int *skip,
                                   int64_t *psse,
602
                                   int64_t tx_cache[TX_MODES],
603
                                   int64_t ref_best_rd,
604
                                   BLOCK_SIZE bs) {
605
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
606
607
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
hkuang's avatar
hkuang committed
608
  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
609
  vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
610
611
  int r[TX_SIZES][2], s[TX_SIZES];
  int64_t d[TX_SIZES], sse[TX_SIZES];
612
613
614
615
  int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
                             {INT64_MAX, INT64_MAX},
                             {INT64_MAX, INT64_MAX},
                             {INT64_MAX, INT64_MAX}};
616
  int n, m;
617
  int s0, s1;
618
619
  const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
  int64_t best_rd = INT64_MAX;
620
  TX_SIZE best_tx = max_tx_size;
621

622
  const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
623
624
625
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
626

627
  for (n = max_tx_size; n >= 0;  n--) {
628
629
630
    txfm_rd_in_plane(x, &r[n][0], &d[n], &s[n],
                     &sse[n], ref_best_rd, 0, bs, n,
                     cpi->sf.use_fast_coef_costing);
631
632
    r[n][1] = r[n][0];
    if (r[n][0] < INT_MAX) {
633
      for (m = 0; m <= n - (n == (int) max_tx_size); m++) {
634
635
636
637
638
639
        if (m == n)
          r[n][1] += vp9_cost_zero(tx_probs[m]);
        else
          r[n][1] += vp9_cost_one(tx_probs[m]);
      }
    }
640
641
    if (d[n] == INT64_MAX) {
      rd[n][0] = rd[n][1] = INT64_MAX;
642
    } else if (s[n]) {
643
644
645
646
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
647
    }
648

649
650
651
    // Early termination in transform size search.
    if (cpi->sf.tx_size_search_breakout &&
        (rd[n][1] == INT64_MAX ||
652
        (n < (int) max_tx_size && rd[n][1] > rd[n + 1][1]) ||
653
654
655
        s[n] == 1))
      break;

656
657
658
659
    if (rd[n][1] < best_rd) {
      best_tx = n;
      best_rd = rd[n][1];
    }
660
  }
661
662
  mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
                      best_tx : MIN(max_tx_size, max_mode_tx_size);
663
664


665
666
667
  *distortion = d[mbmi->tx_size];
  *rate       = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT];
  *skip       = s[mbmi->tx_size];
668
  *psse       = sse[mbmi->tx_size];
669

670
671
672
673
  tx_cache[ONLY_4X4] = rd[TX_4X4][0];
  tx_cache[ALLOW_8X8] = rd[TX_8X8][0];
  tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0];
  tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0];
674

675
676
677
678
  if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
    tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
  } else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) {
    tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
Deb Mukherjee's avatar
Deb Mukherjee committed
679
  } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
680
    tx_cache[TX_MODE_SELECT] = rd[TX_8X8][1];
Deb Mukherjee's avatar
Deb Mukherjee committed
681
  } else {
682
    tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1];
683
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
684
}
685

686
687
688
689
690
static void super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
                            int64_t *distortion, int *skip,
                            int64_t *psse, BLOCK_SIZE bs,
                            int64_t txfm_cache[TX_MODES],
                            int64_t ref_best_rd) {
Jim Bankoski's avatar
Jim Bankoski committed
691
  MACROBLOCKD *xd = &x->e_mbd;
692
693
  int64_t sse;
  int64_t *ret_sse = psse ? psse : &sse;
Yaowu Xu's avatar
Yaowu Xu committed
694

hkuang's avatar
hkuang committed
695
  assert(bs == xd->mi[0].src_mi->mbmi.sb_type);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
696

697
  if (cpi->sf.tx_size_search_method == USE_LARGESTALL || xd->lossless) {
698
    vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
699
    choose_largest_tx_size(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd,
700
                           bs);
701
  } else {
702
    choose_tx_size_from_rd(cpi, x, rate, distortion, skip, ret_sse,
703
                           txfm_cache, ref_best_rd, bs);
704
705
706
  }
}

707
708
static int conditional_skipintra(PREDICTION_MODE mode,
                                 PREDICTION_MODE best_intra_mode) {
709
710
711
712
713
714
715
716
  if (mode == D117_PRED &&
      best_intra_mode != V_PRED &&
      best_intra_mode != D135_PRED)
    return 1;
  if (mode == D63_PRED &&
      best_intra_mode != V_PRED &&
      best_intra_mode != D45_PRED)
    return 1;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
717
  if (mode == D207_PRED &&
718
719
720
721
722
723
724
725
726
727
      best_intra_mode != H_PRED &&
      best_intra_mode != D45_PRED)
    return 1;
  if (mode == D153_PRED &&
      best_intra_mode != H_PRED &&
      best_intra_mode != D135_PRED)
    return 1;
  return 0;
}

728
static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
729
                                     PREDICTION_MODE *best_mode,
730
                                     const int *bmode_costs,
731
732
                                     ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                                     int *bestrate, int *bestratey,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
733
                                     int64_t *bestdistortion,
734
                                     BLOCK_SIZE bsize, int64_t rd_thresh) {
735
  PREDICTION_MODE mode;
736
  MACROBLOCKD *const xd = &x->e_mbd;
737
  int64_t best_rd = rd_thresh;
738

739
740
741
  struct macroblock_plane *p = &x->plane[0];
  struct macroblockd_plane *pd = &xd->plane[0];
  const int src_stride = p->src.stride;
742
  const int dst_stride = pd->dst.stride;
743
744
745
746
  const uint8_t *src_init = &p->src.buf[raster_block_offset(BLOCK_8X8, ib,
                                                            src_stride)];
  uint8_t *dst_init = &pd->dst.buf[raster_block_offset(BLOCK_8X8, ib,
                                                       dst_stride)];
747
748
  ENTROPY_CONTEXT ta[2], tempa[2];
  ENTROPY_CONTEXT tl[2], templ[2];
749

Jim Bankoski's avatar
Jim Bankoski committed
750
751
  const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
  const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
752
  int idx, idy;
753
  uint8_t best_dst[8 * 8];
754
755
756
#if CONFIG_VP9_HIGHBITDEPTH
  uint16_t best_dst16[8 * 8];
#endif
John Koleszar's avatar
John Koleszar committed
757

Jingning Han's avatar
Jingning Han committed
758
  assert(ib < 4);
759

760
761
  vpx_memcpy(ta, a, sizeof(ta));
  vpx_memcpy(tl, l, sizeof(tl));
hkuang's avatar
hkuang committed
762
  xd->mi[0].src_mi->mbmi.tx_size = TX_4X4;
763

764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
#if CONFIG_VP9_HIGHBITDEPTH
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
      int64_t this_rd;
      int ratey = 0;
      int64_t distortion = 0;
      int rate = bmode_costs[mode];

      if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
        continue;

      // Only do the oblique modes if the best so far is
      // one of the neighboring directional modes
      if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
        if (conditional_skipintra(mode, *best_mode))
            continue;
      }

      vpx_memcpy(tempa, ta, sizeof(ta));
      vpx_memcpy(templ, tl, sizeof(tl));

      for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
        for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
          const int block = ib + idy * 2 + idx;
          const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
          uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
          int16_t *const src_diff = raster_block_offset_int16(BLOCK_8X8, block,
                                                              p->src_diff);
          tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
          xd->mi[0].src_mi->bmi[block].as_mode = mode;
          vp9_predict_intra_block(xd, block, 1,
                                  TX_4X4, mode,
                                  x->skip_encode ? src : dst,
                                  x->skip_encode ? src_stride : dst_stride,
                                  dst, dst_stride, idx, idy, 0);
          vp9_high_subtract_block(4, 4, src_diff, 8, src, src_stride,
                                  dst, dst_stride, xd->bd);
          if (xd->lossless) {
            const scan_order *so = &vp9_default_scan_orders[TX_4X4];
            vp9_high_fwht4x4(src_diff, coeff, 8);
            vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
            ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
                                 so->scan, so->neighbors,
                                 cpi->sf.use_fast_coef_costing);
            if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
              goto next_highbd;
            vp9_high_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block),
                                 dst, dst_stride,
                                 p->eobs[block], xd->bd);
          } else {
            int64_t unused;
            const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block);
            const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type];
            vp9_high_fht4x4(src_diff, coeff, 8, tx_type);
            vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
            ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
                                 so->scan, so->neighbors,
                                 cpi->sf.use_fast_coef_costing);
            distortion += vp9_high_block_error(coeff,
                                               BLOCK_OFFSET(pd->dqcoeff, block),
                                               16, &unused, xd->bd) >> 2;
            if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
              goto next_highbd;
            vp9_high_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block),
                                dst, dst_stride, p->eobs[block], xd->bd);
          }
        }
      }

      rate += ratey;
      this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);

      if (this_rd < best_rd) {
        *bestrate = rate;
        *bestratey = ratey;
        *bestdistortion = distortion;
        best_rd = this_rd;
        *best_mode = mode;
        vpx_memcpy(a, tempa, sizeof(tempa));
        vpx_memcpy(l, templ, sizeof(templ));
        for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
          vpx_memcpy(best_dst16 + idy * 8,
                     CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
                     num_4x4_blocks_wide * 4 * sizeof(uint16_t));
        }
      }
    next_highbd:
      {}
    }
    if (best_rd >= rd_thresh || x->skip_encode)
      return best_rd;

    for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
      vpx_memcpy(CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
                 best_dst16 + idy * 8,
                 num_4x4_blocks_wide * 4 * sizeof(uint16_t));
    }

    return best_rd;
  }
#endif  // CONFIG_VP9_HIGHBITDEPTH

866
  for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
867
    int64_t this_rd;
868
    int ratey = 0;
869
870
    int64_t distortion = 0;
    int rate = bmode_costs[mode];
871

872
    if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
873
874
      continue;

875
876
    // Only do the oblique modes if the best so far is
    // one of the neighboring directional modes
877
878
    if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
      if (conditional_skipintra(mode, *best_mode))
879
880
          continue;
    }
881

882
883
884
    vpx_memcpy(tempa, ta, sizeof(ta));
    vpx_memcpy(templ, tl, sizeof(tl));

Jim Bankoski's avatar
Jim Bankoski committed
885
886
    for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
      for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
887
        const int block = ib + idy * 2 + idx;
888
889
890
891
        const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
        uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
        int16_t *const src_diff = raster_block_offset_int16(BLOCK_8X8, block,
                                                            p->src_diff);
892
        tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
hkuang's avatar
hkuang committed
893
        xd->mi[0].src_mi->bmi[block].as_mode = mode;
894
        vp9_predict_intra_block(xd, block, 1,
895
                                TX_4X4, mode,
896
897
                                x->skip_encode ? src : dst,
                                x->skip_encode ? src_stride : dst_stride,
898
                                dst, dst_stride, idx, idy, 0);
899
900
901
902
903
904
905
        vp9_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride);

        if (xd->lossless) {
          const scan_order *so = &vp9_default_scan_orders[TX_4X4];
          vp9_fwht4x4(src_diff, coeff, 8);
          vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
          ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
906
907
                               so->scan, so->neighbors,
                               cpi->sf.use_fast_coef_costing);
908
909
910
911
912
913
914
915
          if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
            goto next;
          vp9_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block), dst, dst_stride,
                          p->eobs[block]);
        } else {
          int64_t unused;
          const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block);
          const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type];
916
          vp9_fht4x4(src_diff, coeff, 8, tx_type);
917
918
          vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
          ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
919
920
                             so->scan, so->neighbors,
                             cpi->sf.use_fast_coef_costing);
921
922
923
924
925
926
927
          distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block),
                                        16, &unused) >> 2;
          if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
            goto next;
          vp9_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block),
                         dst, dst_stride, p->eobs[block]);
        }
928
929
      }
    }
Jingning Han's avatar
Jingning Han committed
930

931
932
    rate += ratey;
    this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
Jingning Han's avatar
Jingning Han committed
933

934
935
936
937
938
939
    if (this_rd < best_rd) {
      *bestrate = rate;
      *bestratey = ratey;
      *bestdistortion = distortion;
      best_rd = this_rd;
      *best_mode = mode;
940
941
      vpx_memcpy(a, tempa, sizeof(tempa));
      vpx_memcpy(l, templ, sizeof(templ));
942
943
944
      for (idy = 0; idy <