vp9_rdopt.c 153 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
11
 */

#include <assert.h>
12
#include <math.h>
13

14
15
16
17
18
19
#include "./vp9_rtcd.h"

#include "vpx_mem/vpx_mem.h"

#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_entropy.h"
20
#include "vp9/common/vp9_entropymode.h"
21
22
23
24
#include "vp9/common/vp9_idct.h"
#include "vp9/common/vp9_mvref_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_quant_common.h"
25
26
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
27
28
29
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_systemdependent.h"

Dmitry Kovalev's avatar
Dmitry Kovalev committed
30
#include "vp9/encoder/vp9_cost.h"
31
#include "vp9/encoder/vp9_encodemb.h"
32
#include "vp9/encoder/vp9_encodemv.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
33
#include "vp9/encoder/vp9_encoder.h"
34
#include "vp9/encoder/vp9_mcomp.h"
35
#include "vp9/encoder/vp9_quantize.h"
36
#include "vp9/encoder/vp9_ratectrl.h"
37
#include "vp9/encoder/vp9_rd.h"
38
39
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_variance.h"
Paul Wilkins's avatar
Paul Wilkins committed
40

41
42
#define RD_THRESH_MAX_FACT 64
#define RD_THRESH_INC      1
43

44
45
46
47
48
49
50
51
#define LAST_FRAME_MODE_MASK    ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | \
                                 (1 << INTRA_FRAME))
#define GOLDEN_FRAME_MODE_MASK  ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | \
                                 (1 << INTRA_FRAME))
#define ALT_REF_MODE_MASK       ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | \
                                 (1 << INTRA_FRAME))

#define SECOND_REF_FRAME_MASK   ((1 << ALTREF_FRAME) | 0x01)
52

Paul Wilkins's avatar
Paul Wilkins committed
53
54
#define MIN_EARLY_TERM_INDEX    3

55
typedef struct {
56
  PREDICTION_MODE mode;
57
58
59
60
61
62
63
  MV_REFERENCE_FRAME ref_frame[2];
} MODE_DEFINITION;

typedef struct {
  MV_REFERENCE_FRAME ref_frame[2];
} REF_DEFINITION;

Alex Converse's avatar
Alex Converse committed
64
65
66
67
68
69
70
71
72
73
74
75
76
struct rdcost_block_args {
  MACROBLOCK *x;
  ENTROPY_CONTEXT t_above[16];
  ENTROPY_CONTEXT t_left[16];
  int rate;
  int64_t dist;
  int64_t sse;
  int this_rate;
  int64_t this_dist;
  int64_t this_sse;
  int64_t this_rd;
  int64_t best_rd;
  int skip;
77
  int use_fast_coef_costing;
78
  const scan_order *so;
Alex Converse's avatar
Alex Converse committed
79
80
};

81
static const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
82
83
84
85
86
87
88
89
90
91
92
93
  {NEARESTMV, {LAST_FRAME,   NONE}},
  {NEARESTMV, {ALTREF_FRAME, NONE}},
  {NEARESTMV, {GOLDEN_FRAME, NONE}},

  {DC_PRED,   {INTRA_FRAME,  NONE}},

  {NEWMV,     {LAST_FRAME,   NONE}},
  {NEWMV,     {ALTREF_FRAME, NONE}},
  {NEWMV,     {GOLDEN_FRAME, NONE}},

  {NEARMV,    {LAST_FRAME,   NONE}},
  {NEARMV,    {ALTREF_FRAME, NONE}},
Jingning Han's avatar
Jingning Han committed
94
95
96
97
98
99
  {NEARMV,    {GOLDEN_FRAME, NONE}},

  {ZEROMV,    {LAST_FRAME,   NONE}},
  {ZEROMV,    {GOLDEN_FRAME, NONE}},
  {ZEROMV,    {ALTREF_FRAME, NONE}},

100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
  {NEARESTMV, {LAST_FRAME,   ALTREF_FRAME}},
  {NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}},

  {TM_PRED,   {INTRA_FRAME,  NONE}},

  {NEARMV,    {LAST_FRAME,   ALTREF_FRAME}},
  {NEWMV,     {LAST_FRAME,   ALTREF_FRAME}},
  {NEARMV,    {GOLDEN_FRAME, ALTREF_FRAME}},
  {NEWMV,     {GOLDEN_FRAME, ALTREF_FRAME}},

  {ZEROMV,    {LAST_FRAME,   ALTREF_FRAME}},
  {ZEROMV,    {GOLDEN_FRAME, ALTREF_FRAME}},

  {H_PRED,    {INTRA_FRAME,  NONE}},
  {V_PRED,    {INTRA_FRAME,  NONE}},
  {D135_PRED, {INTRA_FRAME,  NONE}},
  {D207_PRED, {INTRA_FRAME,  NONE}},
  {D153_PRED, {INTRA_FRAME,  NONE}},
  {D63_PRED,  {INTRA_FRAME,  NONE}},
  {D117_PRED, {INTRA_FRAME,  NONE}},
  {D45_PRED,  {INTRA_FRAME,  NONE}},
121
122
};

123
static const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
124
125
126
127
128
129
  {{LAST_FRAME,   NONE}},
  {{GOLDEN_FRAME, NONE}},
  {{ALTREF_FRAME, NONE}},
  {{LAST_FRAME,   ALTREF_FRAME}},
  {{GOLDEN_FRAME, ALTREF_FRAME}},
  {{INTRA_FRAME,  NONE}},
John Koleszar's avatar
John Koleszar committed
130
131
};

132
133
static int raster_block_offset(BLOCK_SIZE plane_bsize,
                               int raster_block, int stride) {
134
  const int bw = b_width_log2_lookup[plane_bsize];
135
136
137
138
139
140
  const int y = 4 * (raster_block >> bw);
  const int x = 4 * (raster_block & ((1 << bw) - 1));
  return y * stride + x;
}
static int16_t* raster_block_offset_int16(BLOCK_SIZE plane_bsize,
                                          int raster_block, int16_t *base) {
141
  const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
142
143
144
  return base + raster_block_offset(plane_bsize, raster_block, stride);
}

145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
                           int m, int n, int min_plane, int max_plane) {
  int i;

  for (i = min_plane; i < max_plane; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &x->e_mbd.plane[i];

    p->coeff    = ctx->coeff_pbuf[i][m];
    p->qcoeff   = ctx->qcoeff_pbuf[i][m];
    pd->dqcoeff = ctx->dqcoeff_pbuf[i][m];
    p->eobs     = ctx->eobs_pbuf[i][m];

    ctx->coeff_pbuf[i][m]   = ctx->coeff_pbuf[i][n];
    ctx->qcoeff_pbuf[i][m]  = ctx->qcoeff_pbuf[i][n];
    ctx->dqcoeff_pbuf[i][m] = ctx->dqcoeff_pbuf[i][n];
    ctx->eobs_pbuf[i][m]    = ctx->eobs_pbuf[i][n];

    ctx->coeff_pbuf[i][n]   = p->coeff;
    ctx->qcoeff_pbuf[i][n]  = p->qcoeff;
    ctx->dqcoeff_pbuf[i][n] = pd->dqcoeff;
    ctx->eobs_pbuf[i][n]    = p->eobs;
  }
}

170
static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
Deb Mukherjee's avatar
Deb Mukherjee committed
171
                            MACROBLOCK *x, MACROBLOCKD *xd,
172
173
                            int *out_rate_sum, int64_t *out_dist_sum,
                            int *skip_txfm_sb, int64_t *skip_sse_sb) {
Deb Mukherjee's avatar
Deb Mukherjee committed
174
175
176
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
177
178
179
  int i;
  int64_t rate_sum = 0;
  int64_t dist_sum = 0;
hkuang's avatar
hkuang committed
180
  const int ref = xd->mi[0].src_mi->mbmi.ref_frame[0];
181
  unsigned int sse;
182
  unsigned int var = 0;
183
  unsigned int sum_sse = 0;
184
185
  int64_t total_sse = 0;
  int skip_flag = 1;
186
  const int shift = 6;
187
188
189
190
  int rate;
  int64_t dist;

  x->pred_sse[ref] = 0;
Deb Mukherjee's avatar
Deb Mukherjee committed
191
192
193
194

  for (i = 0; i < MAX_MB_PLANE; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &xd->plane[i];
195
    const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
196
197
    const TX_SIZE max_tx_size = max_txsize_lookup[bs];
    const BLOCK_SIZE unit_size = txsize_to_bsize[max_tx_size];
198
199
200
201
202
203
    const int64_t dc_thr = p->quant_thred[0] >> shift;
    const int64_t ac_thr = p->quant_thred[1] >> shift;
    // The low thresholds are used to measure if the prediction errors are
    // low enough so that we can skip the mode search.
    const int64_t low_dc_thr = MIN(50, dc_thr >> 2);
    const int64_t low_ac_thr = MIN(80, ac_thr >> 2);
204
205
206
207
208
209
    int bw = 1 << (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
    int bh = 1 << (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
    int idx, idy;
    int lw = b_width_log2_lookup[unit_size] + 2;
    int lh = b_height_log2_lookup[unit_size] + 2;

210
    sum_sse = 0;
211
212
213
214
215

    for (idy = 0; idy < bh; ++idy) {
      for (idx = 0; idx < bw; ++idx) {
        uint8_t *src = p->src.buf + (idy * p->src.stride << lh) + (idx << lw);
        uint8_t *dst = pd->dst.buf + (idy * pd->dst.stride << lh) + (idx << lh);
216
        int block_idx = (idy << 1) + idx;
217
        int low_err_skip = 0;
218
219
220
221
222
223

        var = cpi->fn_ptr[unit_size].vf(src, p->src.stride,
                                        dst, pd->dst.stride, &sse);
        x->bsse[(i << 2) + block_idx] = sse;
        sum_sse += sse;

224
        x->skip_txfm[(i << 2) + block_idx] = 0;
225
        if (!x->select_tx_size) {
226
          // Check if all ac coefficients can be quantized to zero.
227
          if (var < ac_thr || var == 0) {
228
            x->skip_txfm[(i << 2) + block_idx] = 2;
229
230

            // Check if dc coefficient can be quantized to zero.
231
            if (sse - var < dc_thr || sse == var) {
232
              x->skip_txfm[(i << 2) + block_idx] = 1;
233
234
235
236

              if (!sse || (var < low_ac_thr && sse - var < low_dc_thr))
                low_err_skip = 1;
            }
237
          }
238
        }
239

240
241
242
        if (skip_flag && !low_err_skip)
          skip_flag = 0;

243
244
245
246
        if (i == 0)
          x->pred_sse[ref] += sse;
      }
    }
247

248
249
    total_sse += sum_sse;

250
    // Fast approximate the modelling function.
251
    if (cpi->oxcf.speed > 4) {
252
      int64_t rate;
253
      const int64_t square_error = sum_sse;
254
      int quantizer = (pd->dequant[1] >> 3);
255
256
257
258
259
#if CONFIG_VP9_HIGHBITDEPTH
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        quantizer >>= (xd->bd - 8);
      }
#endif  // CONFIG_VP9_HIGHBITDEPTH
260

261
262
      if (quantizer < 120)
        rate = (square_error * (280 - quantizer)) >> 8;
263
264
265
266
267
      else
        rate = 0;
      dist = (square_error * quantizer) >> 8;
      rate_sum += rate;
      dist_sum += dist;
268
    } else {
269
270
#if CONFIG_VP9_HIGHBITDEPTH
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
Deb Mukherjee's avatar
Deb Mukherjee committed
271
        vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],
272
273
274
                                     pd->dequant[1] >> (xd->bd - 5),
                                     &rate, &dist);
      } else {
Deb Mukherjee's avatar
Deb Mukherjee committed
275
        vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],
276
277
278
                                     pd->dequant[1] >> 3, &rate, &dist);
      }
#else
279
      vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],
280
                                   pd->dequant[1] >> 3, &rate, &dist);
281
#endif  // CONFIG_VP9_HIGHBITDEPTH
282
      rate_sum += rate;
283
      dist_sum += dist;
284
    }
Deb Mukherjee's avatar
Deb Mukherjee committed
285
286
  }

287
288
  *skip_txfm_sb = skip_flag;
  *skip_sse_sb = total_sse << 4;
289
290
  *out_rate_sum = (int)rate_sum;
  *out_dist_sum = dist_sum << 4;
Deb Mukherjee's avatar
Deb Mukherjee committed
291
292
}

293
int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
294
                          intptr_t block_size, int64_t *ssz) {
Ronald S. Bultje's avatar
Ronald S. Bultje committed
295
  int i;
296
  int64_t error = 0, sqcoeff = 0;
John Koleszar's avatar
John Koleszar committed
297

298
  for (i = 0; i < block_size; i++) {
299
300
301
    const int diff = coeff[i] - dqcoeff[i];
    error +=  diff * diff;
    sqcoeff += coeff[i] * coeff[i];
John Koleszar's avatar
John Koleszar committed
302
  }
John Koleszar's avatar
John Koleszar committed
303

304
  *ssz = sqcoeff;
John Koleszar's avatar
John Koleszar committed
305
  return error;
John Koleszar's avatar
John Koleszar committed
306
307
}

308
309

#if CONFIG_VP9_HIGHBITDEPTH
310
311
312
313
int64_t vp9_highbd_block_error_c(const tran_low_t *coeff,
                                 const tran_low_t *dqcoeff,
                                 intptr_t block_size,
                                 int64_t *ssz, int bd) {
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
  int i;
  int64_t error = 0, sqcoeff = 0;
  int shift = 2 * (bd - 8);
  int rounding = shift > 0 ? 1 << (shift - 1) : 0;

  for (i = 0; i < block_size; i++) {
    const int64_t diff = coeff[i] - dqcoeff[i];
    error +=  diff * diff;
    sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
  }
  assert(error >= 0 && sqcoeff >= 0);
  error = (error + rounding) >> shift;
  sqcoeff = (sqcoeff + rounding) >> shift;

  *ssz = sqcoeff;
  return error;
}
#endif  // CONFIG_VP9_HIGHBITDEPTH

333
334
335
336
337
/* The trailing '0' is a terminator which is used inside cost_coeffs() to
 * decide whether to include cost of a trailing EOB node or not (i.e. we
 * can skip this if the last coefficient in this transform block, e.g. the
 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
 * were non-zero). */
338
static const int16_t band_counts[TX_SIZES][8] = {
339
340
341
342
  { 1, 2, 3, 4,  3,   16 - 13, 0 },
  { 1, 2, 3, 4, 11,   64 - 21, 0 },
  { 1, 2, 3, 4, 11,  256 - 21, 0 },
  { 1, 2, 3, 4, 11, 1024 - 21, 0 },
343
};
344
static INLINE int cost_coeffs(MACROBLOCK *x,
345
                              int plane, int block,
346
                              ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
John Koleszar's avatar
John Koleszar committed
347
                              TX_SIZE tx_size,
348
349
                              const int16_t *scan, const int16_t *nb,
                              int use_fast_coef_costing) {
350
  MACROBLOCKD *const xd = &x->e_mbd;
hkuang's avatar
hkuang committed
351
  MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
352
353
  const struct macroblock_plane *p = &x->plane[plane];
  const struct macroblockd_plane *pd = &xd->plane[plane];
354
  const PLANE_TYPE type = pd->plane_type;
355
  const int16_t *band_count = &band_counts[tx_size][1];
356
  const int eob = p->eobs[block];
357
  const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
358
  unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
Dmitry Kovalev's avatar
Dmitry Kovalev committed
359
                   x->token_costs[tx_size][type][is_inter_block(mbmi)];
360
  uint8_t token_cache[32 * 32];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
361
  int pt = combine_entropy_contexts(*A, *L);
362
  int c, cost;
363
  // Check for consistency of tx_size with mode info
364
  assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
365
                              : get_uv_tx_size(mbmi, pd) == tx_size);
366

367
368
  if (eob == 0) {
    // single eob token
369
    cost = token_costs[0][0][pt][EOB_TOKEN];
370
    c = 0;
371
  } else {
372
    int band_left = *band_count++;
373
374

    // dc token
Dmitry Kovalev's avatar
Dmitry Kovalev committed
375
    int v = qcoeff[0];
376
    int prev_t = vp9_dct_value_tokens_ptr[v].token;
377
    cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
378
    token_cache[0] = vp9_pt_energy_class[prev_t];
379
    ++token_costs;
380
381
382
383

    // ac tokens
    for (c = 1; c < eob; c++) {
      const int rc = scan[c];
384
      int t;
385

Dmitry Kovalev's avatar
Dmitry Kovalev committed
386
      v = qcoeff[rc];
387
      t = vp9_dct_value_tokens_ptr[v].token;
388
389
390
      if (use_fast_coef_costing) {
        cost += (*token_costs)[!prev_t][!prev_t][t] + vp9_dct_value_cost_ptr[v];
      } else {
391
        pt = get_coef_context(nb, token_cache, c);
392
        cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
393
        token_cache[rc] = vp9_pt_energy_class[t];
394
      }
395
      prev_t = t;
396
      if (!--band_left) {
397
398
        band_left = *band_count++;
        ++token_costs;
399
      }
400
    }
401
402

    // eob token
403
    if (band_left) {
404
405
406
      if (use_fast_coef_costing) {
        cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
      } else {
407
        pt = get_coef_context(nb, token_cache, c);
408
409
        cost += (*token_costs)[0][pt][EOB_TOKEN];
      }
410
    }
411
412
  }

413
  // is eob first coefficient;
414
  *A = *L = (c > 0);
415

416
417
  return cost;
}
418
419
420
421
422

#if CONFIG_VP9_HIGHBITDEPTH
static void dist_block(int plane, int block, TX_SIZE tx_size,
                       struct rdcost_block_args* args, int bd) {
#else
Alex Converse's avatar
Alex Converse committed
423
424
static void dist_block(int plane, int block, TX_SIZE tx_size,
                       struct rdcost_block_args* args) {
425
#endif  // CONFIG_VP9_HIGHBITDEPTH
426
  const int ss_txfrm_size = tx_size << 1;
Deb Mukherjee's avatar
Deb Mukherjee committed
427
428
  MACROBLOCK* const x = args->x;
  MACROBLOCKD* const xd = &x->e_mbd;
429
430
  const struct macroblock_plane *const p = &x->plane[plane];
  const struct macroblockd_plane *const pd = &xd->plane[plane];
Deb Mukherjee's avatar
Deb Mukherjee committed
431
  int64_t this_sse;
Alex Converse's avatar
Alex Converse committed
432
  int shift = tx_size == TX_32X32 ? 0 : 2;
433
434
  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
435
#if CONFIG_VP9_HIGHBITDEPTH
436
437
  args->dist = vp9_highbd_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
                                      &this_sse, bd) >> shift;
438
#else
439
440
  args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
                               &this_sse) >> shift;
441
#endif  // CONFIG_VP9_HIGHBITDEPTH
442
  args->sse  = this_sse >> shift;
443

hkuang's avatar
hkuang committed
444
  if (x->skip_encode && !is_inter_block(&xd->mi[0].src_mi->mbmi)) {
445
446
    // TODO(jingning): tune the model to better capture the distortion.
    int64_t p = (pd->dequant[1] * pd->dequant[1] *
447
                    (1 << ss_txfrm_size)) >> (shift + 2);
448
449
450
451
452
#if CONFIG_VP9_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      p >>= ((xd->bd - 8) * 2);
    }
#endif  // CONFIG_VP9_HIGHBITDEPTH
453
454
    args->dist += (p >> 4);
    args->sse  += p;
455
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
456
457
}

458
static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
Alex Converse's avatar
Alex Converse committed
459
                       TX_SIZE tx_size, struct rdcost_block_args* args) {
Deb Mukherjee's avatar
Deb Mukherjee committed
460
  int x_idx, y_idx;
Alex Converse's avatar
Alex Converse committed
461
  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x_idx, &y_idx);
Deb Mukherjee's avatar
Deb Mukherjee committed
462

463
  args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx,
Alex Converse's avatar
Alex Converse committed
464
                           args->t_left + y_idx, tx_size,
465
466
                           args->so->scan, args->so->neighbors,
                           args->use_fast_coef_costing);
Deb Mukherjee's avatar
Deb Mukherjee committed
467
468
}

469
470
static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
                          TX_SIZE tx_size, void *arg) {
Deb Mukherjee's avatar
Deb Mukherjee committed
471
472
473
  struct rdcost_block_args *args = arg;
  MACROBLOCK *const x = args->x;
  MACROBLOCKD *const xd = &x->e_mbd;
hkuang's avatar
hkuang committed
474
  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
475
  int64_t rd1, rd2, rd;
Deb Mukherjee's avatar
Deb Mukherjee committed
476

477
478
479
  if (args->skip)
    return;

480
  if (!is_inter_block(mbmi)) {
481
    vp9_encode_block_intra(x, plane, block, plane_bsize, tx_size, &mbmi->skip);
482
483
484
485
486
487
488
#if CONFIG_VP9_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      dist_block(plane, block, tx_size, args, xd->bd);
    } else {
      dist_block(plane, block, tx_size, args, 8);
    }
#else
489
    dist_block(plane, block, tx_size, args);
490
#endif  // CONFIG_VP9_HIGHBITDEPTH
491
492
  } else if (max_txsize_lookup[plane_bsize] == tx_size) {
    if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 0) {
493
494
      // full forward transform and quantization
      vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
495
496
497
498
499
500
501
#if CONFIG_VP9_HIGHBITDEPTH
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        dist_block(plane, block, tx_size, args, xd->bd);
      } else {
        dist_block(plane, block, tx_size, args, 8);
      }
#else
502
      dist_block(plane, block, tx_size, args);
503
#endif  // CONFIG_VP9_HIGHBITDEPTH
504
    } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 2) {
505
      // compute DC coefficient
506
507
      tran_low_t *const coeff   = BLOCK_OFFSET(x->plane[plane].coeff, block);
      tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
508
      vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size);
509
      args->sse  = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
510
      args->dist = args->sse;
511
512
513
      if (x->plane[plane].eobs[block]) {
        int64_t dc_correct = coeff[0] * coeff[0] -
            (coeff[0] - dqcoeff[0]) * (coeff[0] - dqcoeff[0]);
514
515
516
#if CONFIG_VP9_HIGHBITDEPTH
        dc_correct >>= ((xd->bd - 8) * 2);
#endif
517
518
519
        if (tx_size != TX_32X32)
          dc_correct >>= 2;

520
        args->dist = MAX(0, args->sse - dc_correct);
521
      }
522
523
524
    } else {
      // skip forward transform
      x->plane[plane].eobs[block] = 0;
525
      args->sse  = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
526
527
      args->dist = args->sse;
    }
528
529
530
  } else {
    // full forward transform and quantization
    vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
531
532
533
534
535
536
537
#if CONFIG_VP9_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      dist_block(plane, block, tx_size, args, xd->bd);
    } else {
      dist_block(plane, block, tx_size, args, 8);
    }
#else
538
    dist_block(plane, block, tx_size, args);
539
#endif  // CONFIG_VP9_HIGHBITDEPTH
540
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
541

542
  rate_block(plane, block, plane_bsize, tx_size, args);
543
544
  rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
  rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
545
546

  // TODO(jingning): temporarily enabled only for luma component
547
  rd = MIN(rd1, rd2);
Yaowu Xu's avatar
Yaowu Xu committed
548
  if (plane == 0)
549
    x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] ||
Yaowu Xu's avatar
Yaowu Xu committed
550
                                    (rd1 > rd2 && !xd->lossless);
551

552
553
554
  args->this_rate += args->rate;
  args->this_dist += args->dist;
  args->this_sse  += args->sse;
555
556
557
558
559
560
  args->this_rd += rd;

  if (args->this_rd > args->best_rd) {
    args->skip = 1;
    return;
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
561
562
}

563
static void txfm_rd_in_plane(MACROBLOCK *x,
564
565
566
                             int *rate, int64_t *distortion,
                             int *skippable, int64_t *sse,
                             int64_t ref_best_rd, int plane,
567
568
                             BLOCK_SIZE bsize, TX_SIZE tx_size,
                             int use_fast_coef_casting) {
Deb Mukherjee's avatar
Deb Mukherjee committed
569
  MACROBLOCKD *const xd = &x->e_mbd;
570
  const struct macroblockd_plane *const pd = &xd->plane[plane];
571
572
  struct rdcost_block_args args;
  vp9_zero(args);
573
574
  args.x = x;
  args.best_rd = ref_best_rd;
575
  args.use_fast_coef_costing = use_fast_coef_casting;
576

577
  if (plane == 0)
hkuang's avatar
hkuang committed
578
    xd->mi[0].src_mi->mbmi.tx_size = tx_size;
579

580
  vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
581

582
  args.so = get_scan(xd, tx_size, pd->plane_type, 0);
Deb Mukherjee's avatar
Deb Mukherjee committed
583

584
  vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
585
586
                                         block_rd_txfm, &args);
  if (args.skip) {
587
588
589
590
591
    *rate       = INT_MAX;
    *distortion = INT64_MAX;
    *sse        = INT64_MAX;
    *skippable  = 0;
  } else {
592
593
594
    *distortion = args.this_dist;
    *rate       = args.this_rate;
    *sse        = args.this_sse;
595
    *skippable  = vp9_is_skippable_in_plane(x, bsize, plane);
596
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
597
598
}

599
600
601
602
603
static void choose_largest_tx_size(VP9_COMP *cpi, MACROBLOCK *x,
                                   int *rate, int64_t *distortion,
                                   int *skip, int64_t *sse,
                                   int64_t ref_best_rd,
                                   BLOCK_SIZE bs) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
604
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
Deb Mukherjee's avatar
Deb Mukherjee committed
605
  VP9_COMMON *const cm = &cpi->common;
Yaowu Xu's avatar
Yaowu Xu committed
606
  const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
Deb Mukherjee's avatar
Deb Mukherjee committed
607
  MACROBLOCKD *const xd = &x->e_mbd;
hkuang's avatar
hkuang committed
608
  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
Yaowu Xu's avatar
Yaowu Xu committed
609
610
611

  mbmi->tx_size = MIN(max_tx_size, largest_tx_size);

Alex Converse's avatar
Alex Converse committed
612
  txfm_rd_in_plane(x, rate, distortion, skip,
613
                   sse, ref_best_rd, 0, bs,
614
                   mbmi->tx_size, cpi->sf.use_fast_coef_costing);
Deb Mukherjee's avatar
Deb Mukherjee committed
615
616
}

617
static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
618
619
620
621
                                   int *rate,
                                   int64_t *distortion,
                                   int *skip,
                                   int64_t *psse,
622
                                   int64_t tx_cache[TX_MODES],
623
                                   int64_t ref_best_rd,
624
                                   BLOCK_SIZE bs) {
625
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
626
627
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
hkuang's avatar
hkuang committed
628
  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
629
  vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
630
631
  int r[TX_SIZES][2], s[TX_SIZES];
  int64_t d[TX_SIZES], sse[TX_SIZES];
632
633
634
635
  int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
                             {INT64_MAX, INT64_MAX},
                             {INT64_MAX, INT64_MAX},
                             {INT64_MAX, INT64_MAX}};
636
  int n, m;
637
  int s0, s1;
638
639
  const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
  int64_t best_rd = INT64_MAX;
640
  TX_SIZE best_tx = max_tx_size;
641

642
  const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs);
643
644
645
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
646

647
  for (n = max_tx_size; n >= 0;  n--) {
648
649
650
    txfm_rd_in_plane(x, &r[n][0], &d[n], &s[n],
                     &sse[n], ref_best_rd, 0, bs, n,
                     cpi->sf.use_fast_coef_costing);
651
652
    r[n][1] = r[n][0];
    if (r[n][0] < INT_MAX) {
653
      for (m = 0; m <= n - (n == (int) max_tx_size); m++) {
654
655
656
657
658
659
        if (m == n)
          r[n][1] += vp9_cost_zero(tx_probs[m]);
        else
          r[n][1] += vp9_cost_one(tx_probs[m]);
      }
    }
660
661
    if (d[n] == INT64_MAX) {
      rd[n][0] = rd[n][1] = INT64_MAX;
662
    } else if (s[n]) {
663
664
665
666
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
667
    }
668

669
670
671
    // Early termination in transform size search.
    if (cpi->sf.tx_size_search_breakout &&
        (rd[n][1] == INT64_MAX ||
672
        (n < (int) max_tx_size && rd[n][1] > rd[n + 1][1]) ||
673
674
675
        s[n] == 1))
      break;

676
677
678
679
    if (rd[n][1] < best_rd) {
      best_tx = n;
      best_rd = rd[n][1];
    }
680
  }
681
682
  mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
                      best_tx : MIN(max_tx_size, max_mode_tx_size);
683
684


685
686
687
  *distortion = d[mbmi->tx_size];
  *rate       = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT];
  *skip       = s[mbmi->tx_size];
688
  *psse       = sse[mbmi->tx_size];
689

690
691
692
693
  tx_cache[ONLY_4X4] = rd[TX_4X4][0];
  tx_cache[ALLOW_8X8] = rd[TX_8X8][0];
  tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0];
  tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0];
694

695
696
697
698
  if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
    tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
  } else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) {
    tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
Deb Mukherjee's avatar
Deb Mukherjee committed
699
  } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
700
    tx_cache[TX_MODE_SELECT] = rd[TX_8X8][1];
Deb Mukherjee's avatar
Deb Mukherjee committed
701
  } else {
702
    tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1];
703
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
704
}
705

706
707
708
709
710
static void super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
                            int64_t *distortion, int *skip,
                            int64_t *psse, BLOCK_SIZE bs,
                            int64_t txfm_cache[TX_MODES],
                            int64_t ref_best_rd) {
Jim Bankoski's avatar
Jim Bankoski committed
711
  MACROBLOCKD *xd = &x->e_mbd;
712
713
  int64_t sse;
  int64_t *ret_sse = psse ? psse : &sse;
Yaowu Xu's avatar
Yaowu Xu committed
714

hkuang's avatar
hkuang committed
715
  assert(bs == xd->mi[0].src_mi->mbmi.sb_type);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
716

717
  if (cpi->sf.tx_size_search_method == USE_LARGESTALL || xd->lossless) {
718
    vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
719
    choose_largest_tx_size(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd,
720
                           bs);
721
  } else {
722
    choose_tx_size_from_rd(cpi, x, rate, distortion, skip, ret_sse,
723
                           txfm_cache, ref_best_rd, bs);
724
725
726
  }
}

727
728
static int conditional_skipintra(PREDICTION_MODE mode,
                                 PREDICTION_MODE best_intra_mode) {
729
730
731
732
733
734
735
736
  if (mode == D117_PRED &&
      best_intra_mode != V_PRED &&
      best_intra_mode != D135_PRED)
    return 1;
  if (mode == D63_PRED &&
      best_intra_mode != V_PRED &&
      best_intra_mode != D45_PRED)
    return 1;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
737
  if (mode == D207_PRED &&
738
739
740
741
742
743
744
745
746
747
      best_intra_mode != H_PRED &&
      best_intra_mode != D45_PRED)
    return 1;
  if (mode == D153_PRED &&
      best_intra_mode != H_PRED &&
      best_intra_mode != D135_PRED)
    return 1;
  return 0;
}

748
static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
749
                                     PREDICTION_MODE *best_mode,
750
                                     const int *bmode_costs,
751
752
                                     ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                                     int *bestrate, int *bestratey,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
753
                                     int64_t *bestdistortion,
754
                                     BLOCK_SIZE bsize, int64_t rd_thresh) {
755
  PREDICTION_MODE mode;
756
  MACROBLOCKD *const xd = &x->e_mbd;
757
  int64_t best_rd = rd_thresh;
758

759
760
761
  struct macroblock_plane *p = &x->plane[0];
  struct macroblockd_plane *pd = &xd->plane[0];
  const int src_stride = p->src.stride;
762
  const int dst_stride = pd->dst.stride;
763
764
765
766
  const uint8_t *src_init = &p->src.buf[raster_block_offset(BLOCK_8X8, ib,
                                                            src_stride)];
  uint8_t *dst_init = &pd->dst.buf[raster_block_offset(BLOCK_8X8, ib,
                                                       dst_stride)];
767
768
  ENTROPY_CONTEXT ta[2], tempa[2];
  ENTROPY_CONTEXT tl[2], templ[2];
769

Jim Bankoski's avatar
Jim Bankoski committed
770
771
  const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
  const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
772
  int idx, idy;
773
  uint8_t best_dst[8 * 8];
774
775
776
#if CONFIG_VP9_HIGHBITDEPTH
  uint16_t best_dst16[8 * 8];
#endif
John Koleszar's avatar
John Koleszar committed
777

Jingning Han's avatar
Jingning Han committed
778
  assert(ib < 4);
779

780
781
  vpx_memcpy(ta, a, sizeof(ta));
  vpx_memcpy(tl, l, sizeof(tl));
hkuang's avatar
hkuang committed
782
  xd->mi[0].src_mi->mbmi.tx_size = TX_4X4;
783

784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
#if CONFIG_VP9_HIGHBITDEPTH
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
      int64_t this_rd;
      int ratey = 0;
      int64_t distortion = 0;
      int rate = bmode_costs[mode];

      if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
        continue;

      // Only do the oblique modes if the best so far is
      // one of the neighboring directional modes
      if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
        if (conditional_skipintra(mode, *best_mode))
            continue;
      }

      vpx_memcpy(tempa, ta, sizeof(ta));
      vpx_memcpy(templ, tl, sizeof(tl));

      for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
        for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
          const int block = ib + idy * 2 + idx;
          const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
          uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
          int16_t *const src_diff = raster_block_offset_int16(BLOCK_8X8, block,
                                                              p->src_diff);
          tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
          xd->mi[0].src_mi->bmi[block].as_mode = mode;
          vp9_predict_intra_block(xd, block, 1,
                                  TX_4X4, mode,
                                  x->skip_encode ? src : dst,
                                  x->skip_encode ? src_stride : dst_stride,
                                  dst, dst_stride, idx, idy, 0);
819
820
          vp9_highbd_subtract_block(4, 4, src_diff, 8, src, src_stride,
                                    dst, dst_stride, xd->bd);
821
822
          if (xd->lossless) {
            const scan_order *so = &vp9_default_scan_orders[TX_4X4];
823
            vp9_highbd_fwht4x4(src_diff, coeff, 8);
824
825
826
827
828
829
            vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
            ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
                                 so->scan, so->neighbors,
                                 cpi->sf.use_fast_coef_costing);
            if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
              goto next_highbd;
830
831
832
            vp9_highbd_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block),
                                   dst, dst_stride,
                                   p->eobs[block], xd->bd);
833
834
835
836
          } else {
            int64_t unused;
            const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block);
            const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type];
837
            vp9_highbd_fht4x4(src_diff, coeff, 8, tx_type);
838
839
840
841
            vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
            ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
                                 so->scan, so->neighbors,
                                 cpi->sf.use_fast_coef_costing);
842
843
844
            distortion += vp9_highbd_block_error(
                coeff, BLOCK_OFFSET(pd->dqcoeff, block),
                16, &unused, xd->bd) >> 2;
845
846
            if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
              goto next_highbd;
847
848
            vp9_highbd_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block),
                                  dst, dst_stride, p->eobs[block], xd->bd);
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
          }
        }
      }

      rate += ratey;
      this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);

      if (this_rd < best_rd) {
        *bestrate = rate;
        *bestratey = ratey;
        *bestdistortion = distortion;
        best_rd = this_rd;
        *best_mode = mode;
        vpx_memcpy(a, tempa, sizeof(tempa));
        vpx_memcpy(l, templ, sizeof(templ));
        for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
          vpx_memcpy(best_dst16 + idy * 8,
                     CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
                     num_4x4_blocks_wide * 4 * sizeof(uint16_t));
        }
      }
    next_highbd:
      {}
    }
    if (best_rd >= rd_thresh || x->skip_encode)
      return best_rd;

    for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
      vpx_memcpy(CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
                 best_dst16 + idy * 8,
                 num_4x4_blocks_wide * 4 * sizeof(uint16_t));
    }

    return best_rd;
  }
#endif  // CONFIG_VP9_HIGHBITDEPTH

886
  for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
887
    int64_t this_rd;
888
    int ratey = 0;
889
890
    int64_t distortion = 0;
    int rate = bmode_costs[mode];
891

892
    if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
893
894
      continue;

895
896
    // Only do the oblique modes if the best so far is
    // one of the neighboring directional modes
897
898
    if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
      if (conditional_skipintra(mode, *best_mode))
899
900
          continue;
    }
901

902
903
904
    vpx_memcpy(tempa, ta, sizeof(ta));
    vpx_memcpy(templ, tl, sizeof(tl));

Jim Bankoski's avatar
Jim Bankoski committed
905
906
    for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
      for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
907
        const int block = ib + idy * 2 + idx;
908
909
910
911
        const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
        uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
        int16_t *const src_diff = raster_block_offset_int16(BLOCK_8X8, block,
                                                            p->src_diff);
912
        tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
hkuang's avatar
hkuang committed
913
        xd->mi[0].src_mi->bmi[block].as_mode = mode;
914
        vp9_predict_intra_block(xd, block, 1,
915
                                TX_4X4, mode,
916
917
                                x->skip_encode ? src : dst,
                                x->skip_encode ? src_stride : dst_stride,
918
                                dst, dst_stride, idx, idy, 0);
919
920
921
922
923
924
925
        vp9_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride);

        if (xd->lossless) {
          const scan_order *so = &vp9_default_scan_orders[TX_4X4];
          vp9_fwht4x4(src_diff, coeff, 8);
          vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
          ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,