vp9_rdopt.c 153 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
11
 */

#include <assert.h>
12
#include <math.h>
13

14
15
16
17
18
19
#include "./vp9_rtcd.h"

#include "vpx_mem/vpx_mem.h"

#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_entropy.h"
20
#include "vp9/common/vp9_entropymode.h"
21
22
23
24
#include "vp9/common/vp9_idct.h"
#include "vp9/common/vp9_mvref_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_quant_common.h"
25
26
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
27
28
29
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_systemdependent.h"

Dmitry Kovalev's avatar
Dmitry Kovalev committed
30
#include "vp9/encoder/vp9_cost.h"
31
#include "vp9/encoder/vp9_encodemb.h"
32
#include "vp9/encoder/vp9_encodemv.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
33
#include "vp9/encoder/vp9_encoder.h"
34
#include "vp9/encoder/vp9_mcomp.h"
35
#include "vp9/encoder/vp9_quantize.h"
36
#include "vp9/encoder/vp9_ratectrl.h"
37
#include "vp9/encoder/vp9_rd.h"
38
39
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_variance.h"
Paul Wilkins's avatar
Paul Wilkins committed
40

41
42
43
44
45
46
47
48
#define LAST_FRAME_MODE_MASK    ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | \
                                 (1 << INTRA_FRAME))
#define GOLDEN_FRAME_MODE_MASK  ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | \
                                 (1 << INTRA_FRAME))
#define ALT_REF_MODE_MASK       ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | \
                                 (1 << INTRA_FRAME))

#define SECOND_REF_FRAME_MASK   ((1 << ALTREF_FRAME) | 0x01)
49

Paul Wilkins's avatar
Paul Wilkins committed
50
51
#define MIN_EARLY_TERM_INDEX    3

52
typedef struct {
53
  PREDICTION_MODE mode;
54
55
56
57
58
59
60
  MV_REFERENCE_FRAME ref_frame[2];
} MODE_DEFINITION;

typedef struct {
  MV_REFERENCE_FRAME ref_frame[2];
} REF_DEFINITION;

Alex Converse's avatar
Alex Converse committed
61
62
63
64
65
66
67
68
69
70
71
72
73
struct rdcost_block_args {
  MACROBLOCK *x;
  ENTROPY_CONTEXT t_above[16];
  ENTROPY_CONTEXT t_left[16];
  int rate;
  int64_t dist;
  int64_t sse;
  int this_rate;
  int64_t this_dist;
  int64_t this_sse;
  int64_t this_rd;
  int64_t best_rd;
  int skip;
74
  int use_fast_coef_costing;
75
  const scan_order *so;
Alex Converse's avatar
Alex Converse committed
76
77
};

78
static const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
79
80
81
82
83
84
85
86
87
88
89
90
  {NEARESTMV, {LAST_FRAME,   NONE}},
  {NEARESTMV, {ALTREF_FRAME, NONE}},
  {NEARESTMV, {GOLDEN_FRAME, NONE}},

  {DC_PRED,   {INTRA_FRAME,  NONE}},

  {NEWMV,     {LAST_FRAME,   NONE}},
  {NEWMV,     {ALTREF_FRAME, NONE}},
  {NEWMV,     {GOLDEN_FRAME, NONE}},

  {NEARMV,    {LAST_FRAME,   NONE}},
  {NEARMV,    {ALTREF_FRAME, NONE}},
Jingning Han's avatar
Jingning Han committed
91
92
93
94
95
96
  {NEARMV,    {GOLDEN_FRAME, NONE}},

  {ZEROMV,    {LAST_FRAME,   NONE}},
  {ZEROMV,    {GOLDEN_FRAME, NONE}},
  {ZEROMV,    {ALTREF_FRAME, NONE}},

97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
  {NEARESTMV, {LAST_FRAME,   ALTREF_FRAME}},
  {NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}},

  {TM_PRED,   {INTRA_FRAME,  NONE}},

  {NEARMV,    {LAST_FRAME,   ALTREF_FRAME}},
  {NEWMV,     {LAST_FRAME,   ALTREF_FRAME}},
  {NEARMV,    {GOLDEN_FRAME, ALTREF_FRAME}},
  {NEWMV,     {GOLDEN_FRAME, ALTREF_FRAME}},

  {ZEROMV,    {LAST_FRAME,   ALTREF_FRAME}},
  {ZEROMV,    {GOLDEN_FRAME, ALTREF_FRAME}},

  {H_PRED,    {INTRA_FRAME,  NONE}},
  {V_PRED,    {INTRA_FRAME,  NONE}},
  {D135_PRED, {INTRA_FRAME,  NONE}},
  {D207_PRED, {INTRA_FRAME,  NONE}},
  {D153_PRED, {INTRA_FRAME,  NONE}},
  {D63_PRED,  {INTRA_FRAME,  NONE}},
  {D117_PRED, {INTRA_FRAME,  NONE}},
  {D45_PRED,  {INTRA_FRAME,  NONE}},
118
119
};

120
static const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
121
122
123
124
125
126
  {{LAST_FRAME,   NONE}},
  {{GOLDEN_FRAME, NONE}},
  {{ALTREF_FRAME, NONE}},
  {{LAST_FRAME,   ALTREF_FRAME}},
  {{GOLDEN_FRAME, ALTREF_FRAME}},
  {{INTRA_FRAME,  NONE}},
John Koleszar's avatar
John Koleszar committed
127
128
};

129
130
static int raster_block_offset(BLOCK_SIZE plane_bsize,
                               int raster_block, int stride) {
131
  const int bw = b_width_log2_lookup[plane_bsize];
132
133
134
135
136
137
  const int y = 4 * (raster_block >> bw);
  const int x = 4 * (raster_block & ((1 << bw) - 1));
  return y * stride + x;
}
static int16_t* raster_block_offset_int16(BLOCK_SIZE plane_bsize,
                                          int raster_block, int16_t *base) {
138
  const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
139
140
141
  return base + raster_block_offset(plane_bsize, raster_block, stride);
}

142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
                           int m, int n, int min_plane, int max_plane) {
  int i;

  for (i = min_plane; i < max_plane; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &x->e_mbd.plane[i];

    p->coeff    = ctx->coeff_pbuf[i][m];
    p->qcoeff   = ctx->qcoeff_pbuf[i][m];
    pd->dqcoeff = ctx->dqcoeff_pbuf[i][m];
    p->eobs     = ctx->eobs_pbuf[i][m];

    ctx->coeff_pbuf[i][m]   = ctx->coeff_pbuf[i][n];
    ctx->qcoeff_pbuf[i][m]  = ctx->qcoeff_pbuf[i][n];
    ctx->dqcoeff_pbuf[i][m] = ctx->dqcoeff_pbuf[i][n];
    ctx->eobs_pbuf[i][m]    = ctx->eobs_pbuf[i][n];

    ctx->coeff_pbuf[i][n]   = p->coeff;
    ctx->qcoeff_pbuf[i][n]  = p->qcoeff;
    ctx->dqcoeff_pbuf[i][n] = pd->dqcoeff;
    ctx->eobs_pbuf[i][n]    = p->eobs;
  }
}

167
static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
Deb Mukherjee's avatar
Deb Mukherjee committed
168
                            MACROBLOCK *x, MACROBLOCKD *xd,
169
170
                            int *out_rate_sum, int64_t *out_dist_sum,
                            int *skip_txfm_sb, int64_t *skip_sse_sb) {
Deb Mukherjee's avatar
Deb Mukherjee committed
171
172
173
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
174
175
176
  int i;
  int64_t rate_sum = 0;
  int64_t dist_sum = 0;
hkuang's avatar
hkuang committed
177
  const int ref = xd->mi[0].src_mi->mbmi.ref_frame[0];
178
  unsigned int sse;
179
  unsigned int var = 0;
180
  unsigned int sum_sse = 0;
181
182
  int64_t total_sse = 0;
  int skip_flag = 1;
183
  const int shift = 6;
184
185
186
187
  int rate;
  int64_t dist;

  x->pred_sse[ref] = 0;
Deb Mukherjee's avatar
Deb Mukherjee committed
188
189
190
191

  for (i = 0; i < MAX_MB_PLANE; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &xd->plane[i];
192
    const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
193
194
    const TX_SIZE max_tx_size = max_txsize_lookup[bs];
    const BLOCK_SIZE unit_size = txsize_to_bsize[max_tx_size];
195
196
197
198
199
200
    const int64_t dc_thr = p->quant_thred[0] >> shift;
    const int64_t ac_thr = p->quant_thred[1] >> shift;
    // The low thresholds are used to measure if the prediction errors are
    // low enough so that we can skip the mode search.
    const int64_t low_dc_thr = MIN(50, dc_thr >> 2);
    const int64_t low_ac_thr = MIN(80, ac_thr >> 2);
201
202
203
204
205
206
    int bw = 1 << (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
    int bh = 1 << (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
    int idx, idy;
    int lw = b_width_log2_lookup[unit_size] + 2;
    int lh = b_height_log2_lookup[unit_size] + 2;

207
    sum_sse = 0;
208
209
210
211
212

    for (idy = 0; idy < bh; ++idy) {
      for (idx = 0; idx < bw; ++idx) {
        uint8_t *src = p->src.buf + (idy * p->src.stride << lh) + (idx << lw);
        uint8_t *dst = pd->dst.buf + (idy * pd->dst.stride << lh) + (idx << lh);
213
        int block_idx = (idy << 1) + idx;
214
        int low_err_skip = 0;
215
216
217
218
219
220

        var = cpi->fn_ptr[unit_size].vf(src, p->src.stride,
                                        dst, pd->dst.stride, &sse);
        x->bsse[(i << 2) + block_idx] = sse;
        sum_sse += sse;

221
        x->skip_txfm[(i << 2) + block_idx] = 0;
222
        if (!x->select_tx_size) {
223
          // Check if all ac coefficients can be quantized to zero.
224
          if (var < ac_thr || var == 0) {
225
            x->skip_txfm[(i << 2) + block_idx] = 2;
226
227

            // Check if dc coefficient can be quantized to zero.
228
            if (sse - var < dc_thr || sse == var) {
229
              x->skip_txfm[(i << 2) + block_idx] = 1;
230
231
232
233

              if (!sse || (var < low_ac_thr && sse - var < low_dc_thr))
                low_err_skip = 1;
            }
234
          }
235
        }
236

237
238
239
        if (skip_flag && !low_err_skip)
          skip_flag = 0;

240
241
242
243
        if (i == 0)
          x->pred_sse[ref] += sse;
      }
    }
244

245
246
    total_sse += sum_sse;

247
    // Fast approximate the modelling function.
248
    if (cpi->oxcf.speed > 4) {
249
      int64_t rate;
250
      const int64_t square_error = sum_sse;
251
      int quantizer = (pd->dequant[1] >> 3);
252
253
254
255
256
#if CONFIG_VP9_HIGHBITDEPTH
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        quantizer >>= (xd->bd - 8);
      }
#endif  // CONFIG_VP9_HIGHBITDEPTH
257

258
259
      if (quantizer < 120)
        rate = (square_error * (280 - quantizer)) >> 8;
260
261
262
263
264
      else
        rate = 0;
      dist = (square_error * quantizer) >> 8;
      rate_sum += rate;
      dist_sum += dist;
265
    } else {
266
267
#if CONFIG_VP9_HIGHBITDEPTH
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
Deb Mukherjee's avatar
Deb Mukherjee committed
268
        vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],
269
270
271
                                     pd->dequant[1] >> (xd->bd - 5),
                                     &rate, &dist);
      } else {
Deb Mukherjee's avatar
Deb Mukherjee committed
272
        vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],
273
274
275
                                     pd->dequant[1] >> 3, &rate, &dist);
      }
#else
276
      vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],
277
                                   pd->dequant[1] >> 3, &rate, &dist);
278
#endif  // CONFIG_VP9_HIGHBITDEPTH
279
      rate_sum += rate;
280
      dist_sum += dist;
281
    }
Deb Mukherjee's avatar
Deb Mukherjee committed
282
283
  }

284
285
  *skip_txfm_sb = skip_flag;
  *skip_sse_sb = total_sse << 4;
286
287
  *out_rate_sum = (int)rate_sum;
  *out_dist_sum = dist_sum << 4;
Deb Mukherjee's avatar
Deb Mukherjee committed
288
289
}

290
int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
291
                          intptr_t block_size, int64_t *ssz) {
Ronald S. Bultje's avatar
Ronald S. Bultje committed
292
  int i;
293
  int64_t error = 0, sqcoeff = 0;
John Koleszar's avatar
John Koleszar committed
294

295
  for (i = 0; i < block_size; i++) {
296
297
298
    const int diff = coeff[i] - dqcoeff[i];
    error +=  diff * diff;
    sqcoeff += coeff[i] * coeff[i];
John Koleszar's avatar
John Koleszar committed
299
  }
John Koleszar's avatar
John Koleszar committed
300

301
  *ssz = sqcoeff;
John Koleszar's avatar
John Koleszar committed
302
  return error;
John Koleszar's avatar
John Koleszar committed
303
304
}

305
306

#if CONFIG_VP9_HIGHBITDEPTH
307
308
309
310
int64_t vp9_highbd_block_error_c(const tran_low_t *coeff,
                                 const tran_low_t *dqcoeff,
                                 intptr_t block_size,
                                 int64_t *ssz, int bd) {
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
  int i;
  int64_t error = 0, sqcoeff = 0;
  int shift = 2 * (bd - 8);
  int rounding = shift > 0 ? 1 << (shift - 1) : 0;

  for (i = 0; i < block_size; i++) {
    const int64_t diff = coeff[i] - dqcoeff[i];
    error +=  diff * diff;
    sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
  }
  assert(error >= 0 && sqcoeff >= 0);
  error = (error + rounding) >> shift;
  sqcoeff = (sqcoeff + rounding) >> shift;

  *ssz = sqcoeff;
  return error;
}
#endif  // CONFIG_VP9_HIGHBITDEPTH

330
331
332
333
334
/* The trailing '0' is a terminator which is used inside cost_coeffs() to
 * decide whether to include cost of a trailing EOB node or not (i.e. we
 * can skip this if the last coefficient in this transform block, e.g. the
 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
 * were non-zero). */
335
static const int16_t band_counts[TX_SIZES][8] = {
336
337
338
339
  { 1, 2, 3, 4,  3,   16 - 13, 0 },
  { 1, 2, 3, 4, 11,   64 - 21, 0 },
  { 1, 2, 3, 4, 11,  256 - 21, 0 },
  { 1, 2, 3, 4, 11, 1024 - 21, 0 },
340
};
341
static INLINE int cost_coeffs(MACROBLOCK *x,
342
                              int plane, int block,
343
                              ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
John Koleszar's avatar
John Koleszar committed
344
                              TX_SIZE tx_size,
345
346
                              const int16_t *scan, const int16_t *nb,
                              int use_fast_coef_costing) {
347
  MACROBLOCKD *const xd = &x->e_mbd;
hkuang's avatar
hkuang committed
348
  MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
349
350
  const struct macroblock_plane *p = &x->plane[plane];
  const struct macroblockd_plane *pd = &xd->plane[plane];
351
  const PLANE_TYPE type = pd->plane_type;
352
  const int16_t *band_count = &band_counts[tx_size][1];
353
  const int eob = p->eobs[block];
354
  const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
355
  unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
Dmitry Kovalev's avatar
Dmitry Kovalev committed
356
                   x->token_costs[tx_size][type][is_inter_block(mbmi)];
357
  uint8_t token_cache[32 * 32];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
358
  int pt = combine_entropy_contexts(*A, *L);
359
  int c, cost;
360
  // Check for consistency of tx_size with mode info
361
  assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
362
                              : get_uv_tx_size(mbmi, pd) == tx_size);
363

364
365
  if (eob == 0) {
    // single eob token
366
    cost = token_costs[0][0][pt][EOB_TOKEN];
367
    c = 0;
368
  } else {
369
    int band_left = *band_count++;
370
371

    // dc token
Dmitry Kovalev's avatar
Dmitry Kovalev committed
372
    int v = qcoeff[0];
373
    int prev_t = vp9_dct_value_tokens_ptr[v].token;
374
    cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
375
    token_cache[0] = vp9_pt_energy_class[prev_t];
376
    ++token_costs;
377
378
379
380

    // ac tokens
    for (c = 1; c < eob; c++) {
      const int rc = scan[c];
381
      int t;
382

Dmitry Kovalev's avatar
Dmitry Kovalev committed
383
      v = qcoeff[rc];
384
      t = vp9_dct_value_tokens_ptr[v].token;
385
386
387
      if (use_fast_coef_costing) {
        cost += (*token_costs)[!prev_t][!prev_t][t] + vp9_dct_value_cost_ptr[v];
      } else {
388
        pt = get_coef_context(nb, token_cache, c);
389
        cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
390
        token_cache[rc] = vp9_pt_energy_class[t];
391
      }
392
      prev_t = t;
393
      if (!--band_left) {
394
395
        band_left = *band_count++;
        ++token_costs;
396
      }
397
    }
398
399

    // eob token
400
    if (band_left) {
401
402
403
      if (use_fast_coef_costing) {
        cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
      } else {
404
        pt = get_coef_context(nb, token_cache, c);
405
406
        cost += (*token_costs)[0][pt][EOB_TOKEN];
      }
407
    }
408
409
  }

410
  // is eob first coefficient;
411
  *A = *L = (c > 0);
412

413
414
  return cost;
}
415
416
417
418
419

#if CONFIG_VP9_HIGHBITDEPTH
static void dist_block(int plane, int block, TX_SIZE tx_size,
                       struct rdcost_block_args* args, int bd) {
#else
Alex Converse's avatar
Alex Converse committed
420
421
static void dist_block(int plane, int block, TX_SIZE tx_size,
                       struct rdcost_block_args* args) {
422
#endif  // CONFIG_VP9_HIGHBITDEPTH
423
  const int ss_txfrm_size = tx_size << 1;
Deb Mukherjee's avatar
Deb Mukherjee committed
424
425
  MACROBLOCK* const x = args->x;
  MACROBLOCKD* const xd = &x->e_mbd;
426
427
  const struct macroblock_plane *const p = &x->plane[plane];
  const struct macroblockd_plane *const pd = &xd->plane[plane];
Deb Mukherjee's avatar
Deb Mukherjee committed
428
  int64_t this_sse;
Alex Converse's avatar
Alex Converse committed
429
  int shift = tx_size == TX_32X32 ? 0 : 2;
430
431
  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
432
#if CONFIG_VP9_HIGHBITDEPTH
433
434
  args->dist = vp9_highbd_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
                                      &this_sse, bd) >> shift;
435
#else
436
437
  args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
                               &this_sse) >> shift;
438
#endif  // CONFIG_VP9_HIGHBITDEPTH
439
  args->sse  = this_sse >> shift;
440

hkuang's avatar
hkuang committed
441
  if (x->skip_encode && !is_inter_block(&xd->mi[0].src_mi->mbmi)) {
442
443
    // TODO(jingning): tune the model to better capture the distortion.
    int64_t p = (pd->dequant[1] * pd->dequant[1] *
444
                    (1 << ss_txfrm_size)) >> (shift + 2);
445
446
447
448
449
#if CONFIG_VP9_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      p >>= ((xd->bd - 8) * 2);
    }
#endif  // CONFIG_VP9_HIGHBITDEPTH
450
451
    args->dist += (p >> 4);
    args->sse  += p;
452
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
453
454
}

455
static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
Alex Converse's avatar
Alex Converse committed
456
                       TX_SIZE tx_size, struct rdcost_block_args* args) {
Deb Mukherjee's avatar
Deb Mukherjee committed
457
  int x_idx, y_idx;
Alex Converse's avatar
Alex Converse committed
458
  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x_idx, &y_idx);
Deb Mukherjee's avatar
Deb Mukherjee committed
459

460
  args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx,
Alex Converse's avatar
Alex Converse committed
461
                           args->t_left + y_idx, tx_size,
462
463
                           args->so->scan, args->so->neighbors,
                           args->use_fast_coef_costing);
Deb Mukherjee's avatar
Deb Mukherjee committed
464
465
}

466
467
static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
                          TX_SIZE tx_size, void *arg) {
Deb Mukherjee's avatar
Deb Mukherjee committed
468
469
470
  struct rdcost_block_args *args = arg;
  MACROBLOCK *const x = args->x;
  MACROBLOCKD *const xd = &x->e_mbd;
hkuang's avatar
hkuang committed
471
  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
472
  int64_t rd1, rd2, rd;
Deb Mukherjee's avatar
Deb Mukherjee committed
473

474
475
476
  if (args->skip)
    return;

477
  if (!is_inter_block(mbmi)) {
478
479
    struct encode_b_args arg = {x, NULL, &mbmi->skip};
    vp9_encode_block_intra(plane, block, plane_bsize, tx_size, &arg);
480
481
482
483
484
485
486
#if CONFIG_VP9_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      dist_block(plane, block, tx_size, args, xd->bd);
    } else {
      dist_block(plane, block, tx_size, args, 8);
    }
#else
487
    dist_block(plane, block, tx_size, args);
488
#endif  // CONFIG_VP9_HIGHBITDEPTH
489
490
  } else if (max_txsize_lookup[plane_bsize] == tx_size) {
    if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 0) {
491
492
      // full forward transform and quantization
      vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
493
494
495
496
497
498
499
#if CONFIG_VP9_HIGHBITDEPTH
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        dist_block(plane, block, tx_size, args, xd->bd);
      } else {
        dist_block(plane, block, tx_size, args, 8);
      }
#else
500
      dist_block(plane, block, tx_size, args);
501
#endif  // CONFIG_VP9_HIGHBITDEPTH
502
    } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 2) {
503
      // compute DC coefficient
504
505
      tran_low_t *const coeff   = BLOCK_OFFSET(x->plane[plane].coeff, block);
      tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
506
      vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size);
507
      args->sse  = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
508
      args->dist = args->sse;
509
510
511
      if (x->plane[plane].eobs[block]) {
        int64_t dc_correct = coeff[0] * coeff[0] -
            (coeff[0] - dqcoeff[0]) * (coeff[0] - dqcoeff[0]);
512
513
514
#if CONFIG_VP9_HIGHBITDEPTH
        dc_correct >>= ((xd->bd - 8) * 2);
#endif
515
516
517
        if (tx_size != TX_32X32)
          dc_correct >>= 2;

518
        args->dist = MAX(0, args->sse - dc_correct);
519
      }
520
521
522
    } else {
      // skip forward transform
      x->plane[plane].eobs[block] = 0;
523
      args->sse  = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
524
525
      args->dist = args->sse;
    }
526
527
528
  } else {
    // full forward transform and quantization
    vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
529
530
531
532
533
534
535
#if CONFIG_VP9_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      dist_block(plane, block, tx_size, args, xd->bd);
    } else {
      dist_block(plane, block, tx_size, args, 8);
    }
#else
536
    dist_block(plane, block, tx_size, args);
537
#endif  // CONFIG_VP9_HIGHBITDEPTH
538
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
539

540
  rate_block(plane, block, plane_bsize, tx_size, args);
541
542
  rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
  rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
543
544

  // TODO(jingning): temporarily enabled only for luma component
545
  rd = MIN(rd1, rd2);
Yaowu Xu's avatar
Yaowu Xu committed
546
  if (plane == 0)
547
    x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] ||
Yaowu Xu's avatar
Yaowu Xu committed
548
                                    (rd1 > rd2 && !xd->lossless);
549

550
551
552
  args->this_rate += args->rate;
  args->this_dist += args->dist;
  args->this_sse  += args->sse;
553
554
555
556
557
558
  args->this_rd += rd;

  if (args->this_rd > args->best_rd) {
    args->skip = 1;
    return;
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
559
560
}

561
static void txfm_rd_in_plane(MACROBLOCK *x,
562
563
564
                             int *rate, int64_t *distortion,
                             int *skippable, int64_t *sse,
                             int64_t ref_best_rd, int plane,
565
566
                             BLOCK_SIZE bsize, TX_SIZE tx_size,
                             int use_fast_coef_casting) {
Deb Mukherjee's avatar
Deb Mukherjee committed
567
  MACROBLOCKD *const xd = &x->e_mbd;
568
  const struct macroblockd_plane *const pd = &xd->plane[plane];
569
570
  struct rdcost_block_args args;
  vp9_zero(args);
571
572
  args.x = x;
  args.best_rd = ref_best_rd;
573
  args.use_fast_coef_costing = use_fast_coef_casting;
574

575
  if (plane == 0)
hkuang's avatar
hkuang committed
576
    xd->mi[0].src_mi->mbmi.tx_size = tx_size;
577

578
  vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
579

580
  args.so = get_scan(xd, tx_size, pd->plane_type, 0);
Deb Mukherjee's avatar
Deb Mukherjee committed
581

582
  vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
583
584
                                         block_rd_txfm, &args);
  if (args.skip) {
585
586
587
588
589
    *rate       = INT_MAX;
    *distortion = INT64_MAX;
    *sse        = INT64_MAX;
    *skippable  = 0;
  } else {
590
591
592
    *distortion = args.this_dist;
    *rate       = args.this_rate;
    *sse        = args.this_sse;
593
    *skippable  = vp9_is_skippable_in_plane(x, bsize, plane);
594
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
595
596
}

597
598
599
600
601
static void choose_largest_tx_size(VP9_COMP *cpi, MACROBLOCK *x,
                                   int *rate, int64_t *distortion,
                                   int *skip, int64_t *sse,
                                   int64_t ref_best_rd,
                                   BLOCK_SIZE bs) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
602
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
Deb Mukherjee's avatar
Deb Mukherjee committed
603
  VP9_COMMON *const cm = &cpi->common;
Yaowu Xu's avatar
Yaowu Xu committed
604
  const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
Deb Mukherjee's avatar
Deb Mukherjee committed
605
  MACROBLOCKD *const xd = &x->e_mbd;
hkuang's avatar
hkuang committed
606
  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
Yaowu Xu's avatar
Yaowu Xu committed
607
608
609

  mbmi->tx_size = MIN(max_tx_size, largest_tx_size);

Alex Converse's avatar
Alex Converse committed
610
  txfm_rd_in_plane(x, rate, distortion, skip,
611
                   sse, ref_best_rd, 0, bs,
612
                   mbmi->tx_size, cpi->sf.use_fast_coef_costing);
Deb Mukherjee's avatar
Deb Mukherjee committed
613
614
}

615
static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
616
617
618
619
                                   int *rate,
                                   int64_t *distortion,
                                   int *skip,
                                   int64_t *psse,
620
                                   int64_t tx_cache[TX_MODES],
621
                                   int64_t ref_best_rd,
622
                                   BLOCK_SIZE bs) {
623
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
624
625
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
hkuang's avatar
hkuang committed
626
  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
627
  vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
628
629
  int r[TX_SIZES][2], s[TX_SIZES];
  int64_t d[TX_SIZES], sse[TX_SIZES];
630
631
632
633
  int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
                             {INT64_MAX, INT64_MAX},
                             {INT64_MAX, INT64_MAX},
                             {INT64_MAX, INT64_MAX}};
634
  int n, m;
635
  int s0, s1;
636
637
  const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
  int64_t best_rd = INT64_MAX;
638
  TX_SIZE best_tx = max_tx_size;
639

640
  const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs);
641
642
643
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
644

645
  for (n = max_tx_size; n >= 0;  n--) {
646
647
648
    txfm_rd_in_plane(x, &r[n][0], &d[n], &s[n],
                     &sse[n], ref_best_rd, 0, bs, n,
                     cpi->sf.use_fast_coef_costing);
649
650
    r[n][1] = r[n][0];
    if (r[n][0] < INT_MAX) {
651
      for (m = 0; m <= n - (n == (int) max_tx_size); m++) {
652
653
654
655
656
657
        if (m == n)
          r[n][1] += vp9_cost_zero(tx_probs[m]);
        else
          r[n][1] += vp9_cost_one(tx_probs[m]);
      }
    }
658
659
    if (d[n] == INT64_MAX) {
      rd[n][0] = rd[n][1] = INT64_MAX;
660
    } else if (s[n]) {
661
662
663
664
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
665
    }
666

667
668
669
    // Early termination in transform size search.
    if (cpi->sf.tx_size_search_breakout &&
        (rd[n][1] == INT64_MAX ||
670
        (n < (int) max_tx_size && rd[n][1] > rd[n + 1][1]) ||
671
672
673
        s[n] == 1))
      break;

674
675
676
677
    if (rd[n][1] < best_rd) {
      best_tx = n;
      best_rd = rd[n][1];
    }
678
  }
679
680
  mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
                      best_tx : MIN(max_tx_size, max_mode_tx_size);
681
682


683
684
685
  *distortion = d[mbmi->tx_size];
  *rate       = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT];
  *skip       = s[mbmi->tx_size];
686
  *psse       = sse[mbmi->tx_size];
687

688
689
690
691
  tx_cache[ONLY_4X4] = rd[TX_4X4][0];
  tx_cache[ALLOW_8X8] = rd[TX_8X8][0];
  tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0];
  tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0];
692

693
694
695
696
  if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
    tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
  } else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) {
    tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
Deb Mukherjee's avatar
Deb Mukherjee committed
697
  } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
698
    tx_cache[TX_MODE_SELECT] = rd[TX_8X8][1];
Deb Mukherjee's avatar
Deb Mukherjee committed
699
  } else {
700
    tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1];
701
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
702
}
703

704
705
706
707
708
static void super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
                            int64_t *distortion, int *skip,
                            int64_t *psse, BLOCK_SIZE bs,
                            int64_t txfm_cache[TX_MODES],
                            int64_t ref_best_rd) {
Jim Bankoski's avatar
Jim Bankoski committed
709
  MACROBLOCKD *xd = &x->e_mbd;
710
711
  int64_t sse;
  int64_t *ret_sse = psse ? psse : &sse;
Yaowu Xu's avatar
Yaowu Xu committed
712

hkuang's avatar
hkuang committed
713
  assert(bs == xd->mi[0].src_mi->mbmi.sb_type);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
714

715
  if (cpi->sf.tx_size_search_method == USE_LARGESTALL || xd->lossless) {
716
    vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
717
    choose_largest_tx_size(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd,
718
                           bs);
719
  } else {
720
    choose_tx_size_from_rd(cpi, x, rate, distortion, skip, ret_sse,
721
                           txfm_cache, ref_best_rd, bs);
722
723
724
  }
}

725
726
static int conditional_skipintra(PREDICTION_MODE mode,
                                 PREDICTION_MODE best_intra_mode) {
727
728
729
730
731
732
733
734
  if (mode == D117_PRED &&
      best_intra_mode != V_PRED &&
      best_intra_mode != D135_PRED)
    return 1;
  if (mode == D63_PRED &&
      best_intra_mode != V_PRED &&
      best_intra_mode != D45_PRED)
    return 1;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
735
  if (mode == D207_PRED &&
736
737
738
739
740
741
742
743
744
745
      best_intra_mode != H_PRED &&
      best_intra_mode != D45_PRED)
    return 1;
  if (mode == D153_PRED &&
      best_intra_mode != H_PRED &&
      best_intra_mode != D135_PRED)
    return 1;
  return 0;
}

746
static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
747
                                     PREDICTION_MODE *best_mode,
748
                                     const int *bmode_costs,
749
750
                                     ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                                     int *bestrate, int *bestratey,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
751
                                     int64_t *bestdistortion,
752
                                     BLOCK_SIZE bsize, int64_t rd_thresh) {
753
  PREDICTION_MODE mode;
754
  MACROBLOCKD *const xd = &x->e_mbd;
755
  int64_t best_rd = rd_thresh;
756

757
758
759
  struct macroblock_plane *p = &x->plane[0];
  struct macroblockd_plane *pd = &xd->plane[0];
  const int src_stride = p->src.stride;
760
  const int dst_stride = pd->dst.stride;
761
762
763
764
  const uint8_t *src_init = &p->src.buf[raster_block_offset(BLOCK_8X8, ib,
                                                            src_stride)];
  uint8_t *dst_init = &pd->dst.buf[raster_block_offset(BLOCK_8X8, ib,
                                                       dst_stride)];
765
766
  ENTROPY_CONTEXT ta[2], tempa[2];
  ENTROPY_CONTEXT tl[2], templ[2];
767

Jim Bankoski's avatar
Jim Bankoski committed
768
769
  const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
  const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
770
  int idx, idy;
771
  uint8_t best_dst[8 * 8];
772
773
774
#if CONFIG_VP9_HIGHBITDEPTH
  uint16_t best_dst16[8 * 8];
#endif
John Koleszar's avatar
John Koleszar committed
775

Jingning Han's avatar
Jingning Han committed
776
  assert(ib < 4);
777

778
779
  vpx_memcpy(ta, a, sizeof(ta));
  vpx_memcpy(tl, l, sizeof(tl));
hkuang's avatar
hkuang committed
780
  xd->mi[0].src_mi->mbmi.tx_size = TX_4X4;
781

782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
#if CONFIG_VP9_HIGHBITDEPTH
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
      int64_t this_rd;
      int ratey = 0;
      int64_t distortion = 0;
      int rate = bmode_costs[mode];

      if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
        continue;

      // Only do the oblique modes if the best so far is
      // one of the neighboring directional modes
      if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
        if (conditional_skipintra(mode, *best_mode))
            continue;
      }

      vpx_memcpy(tempa, ta, sizeof(ta));
      vpx_memcpy(templ, tl, sizeof(tl));

      for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
        for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
          const int block = ib + idy * 2 + idx;
          const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
          uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
          int16_t *const src_diff = raster_block_offset_int16(BLOCK_8X8, block,
                                                              p->src_diff);
          tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
          xd->mi[0].src_mi->bmi[block].as_mode = mode;
          vp9_predict_intra_block(xd, block, 1,
                                  TX_4X4, mode,
                                  x->skip_encode ? src : dst,
                                  x->skip_encode ? src_stride : dst_stride,
                                  dst, dst_stride, idx, idy, 0);
817
818
          vp9_highbd_subtract_block(4, 4, src_diff, 8, src, src_stride,
                                    dst, dst_stride, xd->bd);
819
820
          if (xd->lossless) {
            const scan_order *so = &vp9_default_scan_orders[TX_4X4];
821
            vp9_highbd_fwht4x4(src_diff, coeff, 8);
822
823
824
825
826
827
            vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
            ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
                                 so->scan, so->neighbors,
                                 cpi->sf.use_fast_coef_costing);
            if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
              goto next_highbd;
828
829
830
            vp9_highbd_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block),
                                   dst, dst_stride,
                                   p->eobs[block], xd->bd);
831
832
833
834
          } else {
            int64_t unused;
            const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block);
            const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type];
835
            vp9_highbd_fht4x4(src_diff, coeff, 8, tx_type);
836
837
838
839
            vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
            ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
                                 so->scan, so->neighbors,
                                 cpi->sf.use_fast_coef_costing);
840
841
842
            distortion += vp9_highbd_block_error(
                coeff, BLOCK_OFFSET(pd->dqcoeff, block),
                16, &unused, xd->bd) >> 2;
843
844
            if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
              goto next_highbd;
845
846
            vp9_highbd_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block),
                                  dst, dst_stride, p->eobs[block], xd->bd);
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
          }
        }
      }

      rate += ratey;
      this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);

      if (this_rd < best_rd) {
        *bestrate = rate;
        *bestratey = ratey;
        *bestdistortion = distortion;
        best_rd = this_rd;
        *best_mode = mode;
        vpx_memcpy(a, tempa, sizeof(tempa));
        vpx_memcpy(l, templ, sizeof(templ));
        for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
          vpx_memcpy(best_dst16 + idy * 8,
                     CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
                     num_4x4_blocks_wide * 4 * sizeof(uint16_t));
        }
      }
    next_highbd:
      {}
    }
    if (best_rd >= rd_thresh || x->skip_encode)
      return best_rd;

    for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
      vpx_memcpy(CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
                 best_dst16 + idy * 8,
                 num_4x4_blocks_wide * 4 * sizeof(uint16_t));
    }

    return best_rd;
  }
#endif  // CONFIG_VP9_HIGHBITDEPTH

884
  for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
885
    int64_t this_rd;
886
    int ratey = 0;
887
888
    int64_t distortion = 0;
    int rate = bmode_costs[mode];
889

890
    if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
891
892
      continue;

893
894
    // Only do the oblique modes if the best so far is
    // one of the neighboring directional modes
895
896
    if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
      if (conditional_skipintra(mode, *best_mode))
897
898
          continue;
    }
899

900
901
902
    vpx_memcpy(tempa, ta, sizeof(ta));
    vpx_memcpy(templ, tl, sizeof(tl));

Jim Bankoski's avatar
Jim Bankoski committed
903
904
    for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
      for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
905
        const int block = ib + idy * 2 + idx;
906
907
908
909
        const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
        uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
        int16_t *const src_diff = raster_block_offset_int16(BLOCK_8X8, block,
                                                            p->src_diff);
910
        tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
hkuang's avatar
hkuang committed
911
        xd->mi[0].src_mi->bmi[block].as_mode = mode;
912
        vp9_predict_intra_block(xd, block, 1,
913
                                TX_4X4, mode,
914
915
                                x->skip_encode ? src : dst,
                                x->skip_encode ? src_stride : dst_stride,
916
                                dst, dst_stride, idx, idy, 0);
917
918
919
920
921
922
923
        vp9_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride);

        if (xd->lossless) {
          const scan_order *so = &vp9_default_scan_orders[TX_4X4];
          vp9_fwht4x4(src_diff, coeff, 8);
          vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
          ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
924
925
                               so->scan, so->neighbors,
                               cpi->sf.use_fast_coef_costing);
926
927
928
929