vp9_rdopt.c 154 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
11
 */

#include <assert.h>
12
#include <math.h>
13

14
15
16
17
18
19
#include "./vp9_rtcd.h"

#include "vpx_mem/vpx_mem.h"

#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_entropy.h"
20
#include "vp9/common/vp9_entropymode.h"
21
22
23
24
#include "vp9/common/vp9_idct.h"
#include "vp9/common/vp9_mvref_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_quant_common.h"
25
26
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
27
28
29
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_systemdependent.h"

Dmitry Kovalev's avatar
Dmitry Kovalev committed
30
#include "vp9/encoder/vp9_cost.h"
31
#include "vp9/encoder/vp9_encodemb.h"
32
#include "vp9/encoder/vp9_encodemv.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
33
#include "vp9/encoder/vp9_encoder.h"
34
#include "vp9/encoder/vp9_mcomp.h"
35
#include "vp9/encoder/vp9_quantize.h"
36
#include "vp9/encoder/vp9_ratectrl.h"
37
#include "vp9/encoder/vp9_rd.h"
38
39
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_variance.h"
Paul Wilkins's avatar
Paul Wilkins committed
40

41
42
#define RD_THRESH_MAX_FACT 64
#define RD_THRESH_INC      1
43

44
45
46
47
48
49
50
51
#define LAST_FRAME_MODE_MASK    ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | \
                                 (1 << INTRA_FRAME))
#define GOLDEN_FRAME_MODE_MASK  ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | \
                                 (1 << INTRA_FRAME))
#define ALT_REF_MODE_MASK       ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | \
                                 (1 << INTRA_FRAME))

#define SECOND_REF_FRAME_MASK   ((1 << ALTREF_FRAME) | 0x01)
52

Paul Wilkins's avatar
Paul Wilkins committed
53
54
#define MIN_EARLY_TERM_INDEX    3

55
typedef struct {
56
  PREDICTION_MODE mode;
57
58
59
60
61
62
63
  MV_REFERENCE_FRAME ref_frame[2];
} MODE_DEFINITION;

typedef struct {
  MV_REFERENCE_FRAME ref_frame[2];
} REF_DEFINITION;

Alex Converse's avatar
Alex Converse committed
64
65
66
67
68
69
70
71
72
73
74
75
76
struct rdcost_block_args {
  MACROBLOCK *x;
  ENTROPY_CONTEXT t_above[16];
  ENTROPY_CONTEXT t_left[16];
  int rate;
  int64_t dist;
  int64_t sse;
  int this_rate;
  int64_t this_dist;
  int64_t this_sse;
  int64_t this_rd;
  int64_t best_rd;
  int skip;
77
  int use_fast_coef_costing;
78
  const scan_order *so;
Alex Converse's avatar
Alex Converse committed
79
80
};

81
static const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
82
83
84
85
86
87
88
89
90
91
92
93
  {NEARESTMV, {LAST_FRAME,   NONE}},
  {NEARESTMV, {ALTREF_FRAME, NONE}},
  {NEARESTMV, {GOLDEN_FRAME, NONE}},

  {DC_PRED,   {INTRA_FRAME,  NONE}},

  {NEWMV,     {LAST_FRAME,   NONE}},
  {NEWMV,     {ALTREF_FRAME, NONE}},
  {NEWMV,     {GOLDEN_FRAME, NONE}},

  {NEARMV,    {LAST_FRAME,   NONE}},
  {NEARMV,    {ALTREF_FRAME, NONE}},
Jingning Han's avatar
Jingning Han committed
94
95
96
97
98
99
  {NEARMV,    {GOLDEN_FRAME, NONE}},

  {ZEROMV,    {LAST_FRAME,   NONE}},
  {ZEROMV,    {GOLDEN_FRAME, NONE}},
  {ZEROMV,    {ALTREF_FRAME, NONE}},

100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
  {NEARESTMV, {LAST_FRAME,   ALTREF_FRAME}},
  {NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}},

  {TM_PRED,   {INTRA_FRAME,  NONE}},

  {NEARMV,    {LAST_FRAME,   ALTREF_FRAME}},
  {NEWMV,     {LAST_FRAME,   ALTREF_FRAME}},
  {NEARMV,    {GOLDEN_FRAME, ALTREF_FRAME}},
  {NEWMV,     {GOLDEN_FRAME, ALTREF_FRAME}},

  {ZEROMV,    {LAST_FRAME,   ALTREF_FRAME}},
  {ZEROMV,    {GOLDEN_FRAME, ALTREF_FRAME}},

  {H_PRED,    {INTRA_FRAME,  NONE}},
  {V_PRED,    {INTRA_FRAME,  NONE}},
  {D135_PRED, {INTRA_FRAME,  NONE}},
  {D207_PRED, {INTRA_FRAME,  NONE}},
  {D153_PRED, {INTRA_FRAME,  NONE}},
  {D63_PRED,  {INTRA_FRAME,  NONE}},
  {D117_PRED, {INTRA_FRAME,  NONE}},
  {D45_PRED,  {INTRA_FRAME,  NONE}},
121
122
};

123
static const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
124
125
126
127
128
129
  {{LAST_FRAME,   NONE}},
  {{GOLDEN_FRAME, NONE}},
  {{ALTREF_FRAME, NONE}},
  {{LAST_FRAME,   ALTREF_FRAME}},
  {{GOLDEN_FRAME, ALTREF_FRAME}},
  {{INTRA_FRAME,  NONE}},
John Koleszar's avatar
John Koleszar committed
130
131
};

132
133
134
135
136
137
138
139
140
static int raster_block_offset(BLOCK_SIZE plane_bsize,
                               int raster_block, int stride) {
  const int bw = b_width_log2(plane_bsize);
  const int y = 4 * (raster_block >> bw);
  const int x = 4 * (raster_block & ((1 << bw) - 1));
  return y * stride + x;
}
static int16_t* raster_block_offset_int16(BLOCK_SIZE plane_bsize,
                                          int raster_block, int16_t *base) {
141
  const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
142
143
144
  return base + raster_block_offset(plane_bsize, raster_block, stride);
}

145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
                           int m, int n, int min_plane, int max_plane) {
  int i;

  for (i = min_plane; i < max_plane; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &x->e_mbd.plane[i];

    p->coeff    = ctx->coeff_pbuf[i][m];
    p->qcoeff   = ctx->qcoeff_pbuf[i][m];
    pd->dqcoeff = ctx->dqcoeff_pbuf[i][m];
    p->eobs     = ctx->eobs_pbuf[i][m];

    ctx->coeff_pbuf[i][m]   = ctx->coeff_pbuf[i][n];
    ctx->qcoeff_pbuf[i][m]  = ctx->qcoeff_pbuf[i][n];
    ctx->dqcoeff_pbuf[i][m] = ctx->dqcoeff_pbuf[i][n];
    ctx->eobs_pbuf[i][m]    = ctx->eobs_pbuf[i][n];

    ctx->coeff_pbuf[i][n]   = p->coeff;
    ctx->qcoeff_pbuf[i][n]  = p->qcoeff;
    ctx->dqcoeff_pbuf[i][n] = pd->dqcoeff;
    ctx->eobs_pbuf[i][n]    = p->eobs;
  }
}

170
static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
Deb Mukherjee's avatar
Deb Mukherjee committed
171
172
173
174
175
                            MACROBLOCK *x, MACROBLOCKD *xd,
                            int *out_rate_sum, int64_t *out_dist_sum) {
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
176
177
178
  int i;
  int64_t rate_sum = 0;
  int64_t dist_sum = 0;
hkuang's avatar
hkuang committed
179
  const int ref = xd->mi[0].src_mi->mbmi.ref_frame[0];
180
  unsigned int sse;
181
  unsigned int var = 0;
182
  unsigned int sum_sse = 0;
183
  const int shift = 8;
184
185
186
187
  int rate;
  int64_t dist;

  x->pred_sse[ref] = 0;
Deb Mukherjee's avatar
Deb Mukherjee committed
188
189
190
191

  for (i = 0; i < MAX_MB_PLANE; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &xd->plane[i];
192
    const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
193
194
195
196
197
198
199
200
    const TX_SIZE max_tx_size = max_txsize_lookup[bs];
    const BLOCK_SIZE unit_size = txsize_to_bsize[max_tx_size];
    int bw = 1 << (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
    int bh = 1 << (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
    int idx, idy;
    int lw = b_width_log2_lookup[unit_size] + 2;
    int lh = b_height_log2_lookup[unit_size] + 2;

201
    sum_sse = 0;
202
203
204
205
206

    for (idy = 0; idy < bh; ++idy) {
      for (idx = 0; idx < bw; ++idx) {
        uint8_t *src = p->src.buf + (idy * p->src.stride << lh) + (idx << lw);
        uint8_t *dst = pd->dst.buf + (idy * pd->dst.stride << lh) + (idx << lh);
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
        int block_idx = (idy << 1) + idx;

        var = cpi->fn_ptr[unit_size].vf(src, p->src.stride,
                                        dst, pd->dst.stride, &sse);
        x->bsse[(i << 2) + block_idx] = sse;
        sum_sse += sse;

        if (!x->select_tx_size) {
          if (x->bsse[(i << 2) + block_idx] < p->quant_thred[0] >> shift)
            x->skip_txfm[(i << 2) + block_idx] = 1;
          else if (var < p->quant_thred[1] >> shift)
            x->skip_txfm[(i << 2) + block_idx] = 2;
          else
            x->skip_txfm[(i << 2) + block_idx] = 0;
        }
222
223
224
225
226

        if (i == 0)
          x->pred_sse[ref] += sse;
      }
    }
227

228
    // Fast approximate the modelling function.
229
    if (cpi->oxcf.speed > 4) {
230
      int64_t rate;
231
232
      int64_t square_error = sse;
      int quantizer = (pd->dequant[1] >> 3);
233
234
235
236
237
#if CONFIG_VP9_HIGHBITDEPTH
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        quantizer >>= (xd->bd - 8);
      }
#endif  // CONFIG_VP9_HIGHBITDEPTH
238

239
240
      if (quantizer < 120)
        rate = (square_error * (280 - quantizer)) >> 8;
241
242
243
244
245
      else
        rate = 0;
      dist = (square_error * quantizer) >> 8;
      rate_sum += rate;
      dist_sum += dist;
246
    } else {
247
248
249
250
251
252
253
254
255
256
#if CONFIG_VP9_HIGHBITDEPTH
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        vp9_model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
                                     pd->dequant[1] >> (xd->bd - 5),
                                     &rate, &dist);
      } else {
        vp9_model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
                                     pd->dequant[1] >> 3, &rate, &dist);
      }
#else
257
      vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],
258
                                   pd->dequant[1] >> 3, &rate, &dist);
259
#endif  // CONFIG_VP9_HIGHBITDEPTH
260
      rate_sum += rate;
261
      dist_sum += dist;
262
    }
Deb Mukherjee's avatar
Deb Mukherjee committed
263
264
  }

265
266
  *out_rate_sum = (int)rate_sum;
  *out_dist_sum = dist_sum << 4;
Deb Mukherjee's avatar
Deb Mukherjee committed
267
268
}

269
int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
270
                          intptr_t block_size, int64_t *ssz) {
Ronald S. Bultje's avatar
Ronald S. Bultje committed
271
  int i;
272
  int64_t error = 0, sqcoeff = 0;
John Koleszar's avatar
John Koleszar committed
273

274
  for (i = 0; i < block_size; i++) {
275
276
277
    const int diff = coeff[i] - dqcoeff[i];
    error +=  diff * diff;
    sqcoeff += coeff[i] * coeff[i];
John Koleszar's avatar
John Koleszar committed
278
  }
John Koleszar's avatar
John Koleszar committed
279

280
  *ssz = sqcoeff;
John Koleszar's avatar
John Koleszar committed
281
  return error;
John Koleszar's avatar
John Koleszar committed
282
283
}

284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308

#if CONFIG_VP9_HIGHBITDEPTH
int64_t vp9_high_block_error_c(const tran_low_t *coeff,
                               const tran_low_t *dqcoeff,
                               intptr_t block_size,
                               int64_t *ssz, int bd) {
  int i;
  int64_t error = 0, sqcoeff = 0;
  int shift = 2 * (bd - 8);
  int rounding = shift > 0 ? 1 << (shift - 1) : 0;

  for (i = 0; i < block_size; i++) {
    const int64_t diff = coeff[i] - dqcoeff[i];
    error +=  diff * diff;
    sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
  }
  assert(error >= 0 && sqcoeff >= 0);
  error = (error + rounding) >> shift;
  sqcoeff = (sqcoeff + rounding) >> shift;

  *ssz = sqcoeff;
  return error;
}
#endif  // CONFIG_VP9_HIGHBITDEPTH

309
310
311
312
313
/* The trailing '0' is a terminator which is used inside cost_coeffs() to
 * decide whether to include cost of a trailing EOB node or not (i.e. we
 * can skip this if the last coefficient in this transform block, e.g. the
 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
 * were non-zero). */
314
static const int16_t band_counts[TX_SIZES][8] = {
315
316
317
318
  { 1, 2, 3, 4,  3,   16 - 13, 0 },
  { 1, 2, 3, 4, 11,   64 - 21, 0 },
  { 1, 2, 3, 4, 11,  256 - 21, 0 },
  { 1, 2, 3, 4, 11, 1024 - 21, 0 },
319
};
320
static INLINE int cost_coeffs(MACROBLOCK *x,
321
                              int plane, int block,
322
                              ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
John Koleszar's avatar
John Koleszar committed
323
                              TX_SIZE tx_size,
324
325
                              const int16_t *scan, const int16_t *nb,
                              int use_fast_coef_costing) {
326
  MACROBLOCKD *const xd = &x->e_mbd;
hkuang's avatar
hkuang committed
327
  MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
328
329
  const struct macroblock_plane *p = &x->plane[plane];
  const struct macroblockd_plane *pd = &xd->plane[plane];
330
  const PLANE_TYPE type = pd->plane_type;
331
  const int16_t *band_count = &band_counts[tx_size][1];
332
  const int eob = p->eobs[block];
333
  const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
334
  unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
Dmitry Kovalev's avatar
Dmitry Kovalev committed
335
                   x->token_costs[tx_size][type][is_inter_block(mbmi)];
336
  uint8_t token_cache[32 * 32];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
337
  int pt = combine_entropy_contexts(*A, *L);
338
  int c, cost;
339
  // Check for consistency of tx_size with mode info
340
  assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
341
                              : get_uv_tx_size(mbmi, pd) == tx_size);
342

343
344
  if (eob == 0) {
    // single eob token
345
    cost = token_costs[0][0][pt][EOB_TOKEN];
346
    c = 0;
347
  } else {
348
    int band_left = *band_count++;
349
350

    // dc token
Dmitry Kovalev's avatar
Dmitry Kovalev committed
351
    int v = qcoeff[0];
352
    int prev_t = vp9_dct_value_tokens_ptr[v].token;
353
    cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
354
    token_cache[0] = vp9_pt_energy_class[prev_t];
355
    ++token_costs;
356
357
358
359

    // ac tokens
    for (c = 1; c < eob; c++) {
      const int rc = scan[c];
360
      int t;
361

Dmitry Kovalev's avatar
Dmitry Kovalev committed
362
      v = qcoeff[rc];
363
      t = vp9_dct_value_tokens_ptr[v].token;
364
365
366
      if (use_fast_coef_costing) {
        cost += (*token_costs)[!prev_t][!prev_t][t] + vp9_dct_value_cost_ptr[v];
      } else {
367
        pt = get_coef_context(nb, token_cache, c);
368
        cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
369
        token_cache[rc] = vp9_pt_energy_class[t];
370
      }
371
      prev_t = t;
372
      if (!--band_left) {
373
374
        band_left = *band_count++;
        ++token_costs;
375
      }
376
    }
377
378

    // eob token
379
    if (band_left) {
380
381
382
      if (use_fast_coef_costing) {
        cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
      } else {
383
        pt = get_coef_context(nb, token_cache, c);
384
385
        cost += (*token_costs)[0][pt][EOB_TOKEN];
      }
386
    }
387
388
  }

389
  // is eob first coefficient;
390
  *A = *L = (c > 0);
391

392
393
  return cost;
}
394
395
396
397
398

#if CONFIG_VP9_HIGHBITDEPTH
static void dist_block(int plane, int block, TX_SIZE tx_size,
                       struct rdcost_block_args* args, int bd) {
#else
Alex Converse's avatar
Alex Converse committed
399
400
static void dist_block(int plane, int block, TX_SIZE tx_size,
                       struct rdcost_block_args* args) {
401
#endif  // CONFIG_VP9_HIGHBITDEPTH
402
  const int ss_txfrm_size = tx_size << 1;
Deb Mukherjee's avatar
Deb Mukherjee committed
403
404
  MACROBLOCK* const x = args->x;
  MACROBLOCKD* const xd = &x->e_mbd;
405
406
  const struct macroblock_plane *const p = &x->plane[plane];
  const struct macroblockd_plane *const pd = &xd->plane[plane];
Deb Mukherjee's avatar
Deb Mukherjee committed
407
  int64_t this_sse;
Alex Converse's avatar
Alex Converse committed
408
  int shift = tx_size == TX_32X32 ? 0 : 2;
409
410
  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
411
412
413
414
#if CONFIG_VP9_HIGHBITDEPTH
  args->dist = vp9_high_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
                                    &this_sse, bd) >> shift;
#else
415
416
  args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
                               &this_sse) >> shift;
417
#endif  // CONFIG_VP9_HIGHBITDEPTH
418
  args->sse  = this_sse >> shift;
419

hkuang's avatar
hkuang committed
420
  if (x->skip_encode && !is_inter_block(&xd->mi[0].src_mi->mbmi)) {
421
422
    // TODO(jingning): tune the model to better capture the distortion.
    int64_t p = (pd->dequant[1] * pd->dequant[1] *
423
                    (1 << ss_txfrm_size)) >> (shift + 2);
424
425
426
427
428
#if CONFIG_VP9_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      p >>= ((xd->bd - 8) * 2);
    }
#endif  // CONFIG_VP9_HIGHBITDEPTH
429
430
    args->dist += (p >> 4);
    args->sse  += p;
431
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
432
433
}

434
static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
Alex Converse's avatar
Alex Converse committed
435
                       TX_SIZE tx_size, struct rdcost_block_args* args) {
Deb Mukherjee's avatar
Deb Mukherjee committed
436
  int x_idx, y_idx;
Alex Converse's avatar
Alex Converse committed
437
  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x_idx, &y_idx);
Deb Mukherjee's avatar
Deb Mukherjee committed
438

439
  args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx,
Alex Converse's avatar
Alex Converse committed
440
                           args->t_left + y_idx, tx_size,
441
442
                           args->so->scan, args->so->neighbors,
                           args->use_fast_coef_costing);
Deb Mukherjee's avatar
Deb Mukherjee committed
443
444
}

445
446
static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
                          TX_SIZE tx_size, void *arg) {
Deb Mukherjee's avatar
Deb Mukherjee committed
447
448
449
  struct rdcost_block_args *args = arg;
  MACROBLOCK *const x = args->x;
  MACROBLOCKD *const xd = &x->e_mbd;
hkuang's avatar
hkuang committed
450
  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
451
  int64_t rd1, rd2, rd;
Deb Mukherjee's avatar
Deb Mukherjee committed
452

453
454
455
  if (args->skip)
    return;

456
  if (!is_inter_block(mbmi)) {
457
    vp9_encode_block_intra(x, plane, block, plane_bsize, tx_size, &mbmi->skip);
458
459
460
461
462
463
464
#if CONFIG_VP9_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      dist_block(plane, block, tx_size, args, xd->bd);
    } else {
      dist_block(plane, block, tx_size, args, 8);
    }
#else
465
    dist_block(plane, block, tx_size, args);
466
#endif  // CONFIG_VP9_HIGHBITDEPTH
467
468
  } else if (max_txsize_lookup[plane_bsize] == tx_size) {
    if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 0) {
469
470
      // full forward transform and quantization
      vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
471
472
473
474
475
476
477
#if CONFIG_VP9_HIGHBITDEPTH
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        dist_block(plane, block, tx_size, args, xd->bd);
      } else {
        dist_block(plane, block, tx_size, args, 8);
      }
#else
478
      dist_block(plane, block, tx_size, args);
479
#endif  // CONFIG_VP9_HIGHBITDEPTH
480
    } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 2) {
481
      // compute DC coefficient
482
483
      tran_low_t *const coeff   = BLOCK_OFFSET(x->plane[plane].coeff, block);
      tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
484
      vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size);
485
      args->sse  = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
486
487
488
489
490
491
492
      args->dist = args->sse;
      if (!x->plane[plane].eobs[block])
        args->dist = args->sse - ((coeff[0] * coeff[0] -
            (coeff[0] - dqcoeff[0]) * (coeff[0] - dqcoeff[0])) >> 2);
    } else {
      // skip forward transform
      x->plane[plane].eobs[block] = 0;
493
      args->sse  = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
494
495
      args->dist = args->sse;
    }
496
497
498
  } else {
    // full forward transform and quantization
    vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
499
500
501
502
503
504
505
#if CONFIG_VP9_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      dist_block(plane, block, tx_size, args, xd->bd);
    } else {
      dist_block(plane, block, tx_size, args, 8);
    }
#else
506
    dist_block(plane, block, tx_size, args);
507
#endif  // CONFIG_VP9_HIGHBITDEPTH
508
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
509

510
  rate_block(plane, block, plane_bsize, tx_size, args);
511
512
  rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
  rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
513
514

  // TODO(jingning): temporarily enabled only for luma component
515
  rd = MIN(rd1, rd2);
Yaowu Xu's avatar
Yaowu Xu committed
516
  if (plane == 0)
517
    x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] ||
Yaowu Xu's avatar
Yaowu Xu committed
518
                                    (rd1 > rd2 && !xd->lossless);
519

520
521
522
  args->this_rate += args->rate;
  args->this_dist += args->dist;
  args->this_sse  += args->sse;
523
524
525
526
527
528
  args->this_rd += rd;

  if (args->this_rd > args->best_rd) {
    args->skip = 1;
    return;
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
529
530
}

531
static void txfm_rd_in_plane(MACROBLOCK *x,
532
533
534
                             int *rate, int64_t *distortion,
                             int *skippable, int64_t *sse,
                             int64_t ref_best_rd, int plane,
535
536
                             BLOCK_SIZE bsize, TX_SIZE tx_size,
                             int use_fast_coef_casting) {
Deb Mukherjee's avatar
Deb Mukherjee committed
537
  MACROBLOCKD *const xd = &x->e_mbd;
538
  const struct macroblockd_plane *const pd = &xd->plane[plane];
539
540
  struct rdcost_block_args args;
  vp9_zero(args);
541
542
  args.x = x;
  args.best_rd = ref_best_rd;
543
  args.use_fast_coef_costing = use_fast_coef_casting;
544

545
  if (plane == 0)
hkuang's avatar
hkuang committed
546
    xd->mi[0].src_mi->mbmi.tx_size = tx_size;
547

548
  vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
549

550
  args.so = get_scan(xd, tx_size, pd->plane_type, 0);
Deb Mukherjee's avatar
Deb Mukherjee committed
551

552
  vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
553
554
                                         block_rd_txfm, &args);
  if (args.skip) {
555
556
557
558
559
    *rate       = INT_MAX;
    *distortion = INT64_MAX;
    *sse        = INT64_MAX;
    *skippable  = 0;
  } else {
560
561
562
    *distortion = args.this_dist;
    *rate       = args.this_rate;
    *sse        = args.this_sse;
563
    *skippable  = vp9_is_skippable_in_plane(x, bsize, plane);
564
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
565
566
}

567
568
569
570
571
static void choose_largest_tx_size(VP9_COMP *cpi, MACROBLOCK *x,
                                   int *rate, int64_t *distortion,
                                   int *skip, int64_t *sse,
                                   int64_t ref_best_rd,
                                   BLOCK_SIZE bs) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
572
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
Deb Mukherjee's avatar
Deb Mukherjee committed
573
  VP9_COMMON *const cm = &cpi->common;
Yaowu Xu's avatar
Yaowu Xu committed
574
  const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
Deb Mukherjee's avatar
Deb Mukherjee committed
575
  MACROBLOCKD *const xd = &x->e_mbd;
hkuang's avatar
hkuang committed
576
  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
Yaowu Xu's avatar
Yaowu Xu committed
577
578
579

  mbmi->tx_size = MIN(max_tx_size, largest_tx_size);

Alex Converse's avatar
Alex Converse committed
580
  txfm_rd_in_plane(x, rate, distortion, skip,
581
                   sse, ref_best_rd, 0, bs,
582
                   mbmi->tx_size, cpi->sf.use_fast_coef_costing);
Deb Mukherjee's avatar
Deb Mukherjee committed
583
584
}

585
static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
586
587
588
589
                                   int *rate,
                                   int64_t *distortion,
                                   int *skip,
                                   int64_t *psse,
590
                                   int64_t tx_cache[TX_MODES],
591
                                   int64_t ref_best_rd,
592
                                   BLOCK_SIZE bs) {
593
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
594
595
  VP9_COMMON *const cm = &cpi->common;
  MACROBLOCKD *const xd = &x->e_mbd;
hkuang's avatar
hkuang committed
596
  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
597
  vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
598
599
  int r[TX_SIZES][2], s[TX_SIZES];
  int64_t d[TX_SIZES], sse[TX_SIZES];
600
601
602
603
  int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
                             {INT64_MAX, INT64_MAX},
                             {INT64_MAX, INT64_MAX},
                             {INT64_MAX, INT64_MAX}};
604
  int n, m;
605
  int s0, s1;
606
607
  const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
  int64_t best_rd = INT64_MAX;
608
  TX_SIZE best_tx = max_tx_size;
609

610
  const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
611
612
613
  assert(skip_prob > 0);
  s0 = vp9_cost_bit(skip_prob, 0);
  s1 = vp9_cost_bit(skip_prob, 1);
614

615
  for (n = max_tx_size; n >= 0;  n--) {
616
617
618
    txfm_rd_in_plane(x, &r[n][0], &d[n], &s[n],
                     &sse[n], ref_best_rd, 0, bs, n,
                     cpi->sf.use_fast_coef_costing);
619
620
    r[n][1] = r[n][0];
    if (r[n][0] < INT_MAX) {
621
      for (m = 0; m <= n - (n == (int) max_tx_size); m++) {
622
623
624
625
626
627
        if (m == n)
          r[n][1] += vp9_cost_zero(tx_probs[m]);
        else
          r[n][1] += vp9_cost_one(tx_probs[m]);
      }
    }
628
629
    if (d[n] == INT64_MAX) {
      rd[n][0] = rd[n][1] = INT64_MAX;
630
    } else if (s[n]) {
631
632
633
634
      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
    } else {
      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
635
    }
636

637
638
639
    // Early termination in transform size search.
    if (cpi->sf.tx_size_search_breakout &&
        (rd[n][1] == INT64_MAX ||
640
        (n < (int) max_tx_size && rd[n][1] > rd[n + 1][1]) ||
641
642
643
        s[n] == 1))
      break;

644
645
646
647
    if (rd[n][1] < best_rd) {
      best_tx = n;
      best_rd = rd[n][1];
    }
648
  }
649
650
  mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
                      best_tx : MIN(max_tx_size, max_mode_tx_size);
651
652


653
654
655
  *distortion = d[mbmi->tx_size];
  *rate       = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT];
  *skip       = s[mbmi->tx_size];
656
  *psse       = sse[mbmi->tx_size];
657

658
659
660
661
  tx_cache[ONLY_4X4] = rd[TX_4X4][0];
  tx_cache[ALLOW_8X8] = rd[TX_8X8][0];
  tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0];
  tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0];
662

663
664
665
666
  if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
    tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
  } else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) {
    tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
Deb Mukherjee's avatar
Deb Mukherjee committed
667
  } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
668
    tx_cache[TX_MODE_SELECT] = rd[TX_8X8][1];
Deb Mukherjee's avatar
Deb Mukherjee committed
669
  } else {
670
    tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1];
671
  }
Deb Mukherjee's avatar
Deb Mukherjee committed
672
}
673

674
675
676
677
678
static void super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
                            int64_t *distortion, int *skip,
                            int64_t *psse, BLOCK_SIZE bs,
                            int64_t txfm_cache[TX_MODES],
                            int64_t ref_best_rd) {
Jim Bankoski's avatar
Jim Bankoski committed
679
  MACROBLOCKD *xd = &x->e_mbd;
680
681
  int64_t sse;
  int64_t *ret_sse = psse ? psse : &sse;
Yaowu Xu's avatar
Yaowu Xu committed
682

hkuang's avatar
hkuang committed
683
  assert(bs == xd->mi[0].src_mi->mbmi.sb_type);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
684

685
  if (cpi->sf.tx_size_search_method == USE_LARGESTALL || xd->lossless) {
686
    vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
687
    choose_largest_tx_size(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd,
688
                           bs);
689
  } else {
690
    choose_tx_size_from_rd(cpi, x, rate, distortion, skip, ret_sse,
691
                           txfm_cache, ref_best_rd, bs);
692
693
694
  }
}

695
696
static int conditional_skipintra(PREDICTION_MODE mode,
                                 PREDICTION_MODE best_intra_mode) {
697
698
699
700
701
702
703
704
  if (mode == D117_PRED &&
      best_intra_mode != V_PRED &&
      best_intra_mode != D135_PRED)
    return 1;
  if (mode == D63_PRED &&
      best_intra_mode != V_PRED &&
      best_intra_mode != D45_PRED)
    return 1;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
705
  if (mode == D207_PRED &&
706
707
708
709
710
711
712
713
714
715
      best_intra_mode != H_PRED &&
      best_intra_mode != D45_PRED)
    return 1;
  if (mode == D153_PRED &&
      best_intra_mode != H_PRED &&
      best_intra_mode != D135_PRED)
    return 1;
  return 0;
}

716
static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
717
                                     PREDICTION_MODE *best_mode,
718
                                     const int *bmode_costs,
719
720
                                     ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                                     int *bestrate, int *bestratey,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
721
                                     int64_t *bestdistortion,
722
                                     BLOCK_SIZE bsize, int64_t rd_thresh) {
723
  PREDICTION_MODE mode;
724
  MACROBLOCKD *const xd = &x->e_mbd;
725
  int64_t best_rd = rd_thresh;
726

727
728
729
  struct macroblock_plane *p = &x->plane[0];
  struct macroblockd_plane *pd = &xd->plane[0];
  const int src_stride = p->src.stride;
730
  const int dst_stride = pd->dst.stride;
731
732
733
734
  const uint8_t *src_init = &p->src.buf[raster_block_offset(BLOCK_8X8, ib,
                                                            src_stride)];
  uint8_t *dst_init = &pd->dst.buf[raster_block_offset(BLOCK_8X8, ib,
                                                       dst_stride)];
735
736
  ENTROPY_CONTEXT ta[2], tempa[2];
  ENTROPY_CONTEXT tl[2], templ[2];
737

Jim Bankoski's avatar
Jim Bankoski committed
738
739
  const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
  const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
740
  int idx, idy;
741
  uint8_t best_dst[8 * 8];
742
743
744
#if CONFIG_VP9_HIGHBITDEPTH
  uint16_t best_dst16[8 * 8];
#endif
John Koleszar's avatar
John Koleszar committed
745

Jingning Han's avatar
Jingning Han committed
746
  assert(ib < 4);
747

748
749
  vpx_memcpy(ta, a, sizeof(ta));
  vpx_memcpy(tl, l, sizeof(tl));
hkuang's avatar
hkuang committed
750
  xd->mi[0].src_mi->mbmi.tx_size = TX_4X4;
751

752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
#if CONFIG_VP9_HIGHBITDEPTH
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
      int64_t this_rd;
      int ratey = 0;
      int64_t distortion = 0;
      int rate = bmode_costs[mode];

      if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
        continue;

      // Only do the oblique modes if the best so far is
      // one of the neighboring directional modes
      if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
        if (conditional_skipintra(mode, *best_mode))
            continue;
      }

      vpx_memcpy(tempa, ta, sizeof(ta));
      vpx_memcpy(templ, tl, sizeof(tl));

      for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
        for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
          const int block = ib + idy * 2 + idx;
          const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
          uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
          int16_t *const src_diff = raster_block_offset_int16(BLOCK_8X8, block,
                                                              p->src_diff);
          tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
          xd->mi[0].src_mi->bmi[block].as_mode = mode;
          vp9_predict_intra_block(xd, block, 1,
                                  TX_4X4, mode,
                                  x->skip_encode ? src : dst,
                                  x->skip_encode ? src_stride : dst_stride,
                                  dst, dst_stride, idx, idy, 0);
          vp9_high_subtract_block(4, 4, src_diff, 8, src, src_stride,
                                  dst, dst_stride, xd->bd);
          if (xd->lossless) {
            const scan_order *so = &vp9_default_scan_orders[TX_4X4];
            vp9_high_fwht4x4(src_diff, coeff, 8);
            vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
            ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
                                 so->scan, so->neighbors,
                                 cpi->sf.use_fast_coef_costing);
            if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
              goto next_highbd;
            vp9_high_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block),
                                 dst, dst_stride,
                                 p->eobs[block], xd->bd);
          } else {
            int64_t unused;
            const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block);
            const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type];
            vp9_high_fht4x4(src_diff, coeff, 8, tx_type);
            vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
            ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
                                 so->scan, so->neighbors,
                                 cpi->sf.use_fast_coef_costing);
            distortion += vp9_high_block_error(coeff,
                                               BLOCK_OFFSET(pd->dqcoeff, block),
                                               16, &unused, xd->bd) >> 2;
            if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
              goto next_highbd;
            vp9_high_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block),
                                dst, dst_stride, p->eobs[block], xd->bd);
          }
        }
      }

      rate += ratey;
      this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);

      if (this_rd < best_rd) {
        *bestrate = rate;
        *bestratey = ratey;
        *bestdistortion = distortion;
        best_rd = this_rd;
        *best_mode = mode;
        vpx_memcpy(a, tempa, sizeof(tempa));
        vpx_memcpy(l, templ, sizeof(templ));
        for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
          vpx_memcpy(best_dst16 + idy * 8,
                     CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
                     num_4x4_blocks_wide * 4 * sizeof(uint16_t));
        }
      }
    next_highbd:
      {}
    }
    if (best_rd >= rd_thresh || x->skip_encode)
      return best_rd;

    for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
      vpx_memcpy(CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
                 best_dst16 + idy * 8,
                 num_4x4_blocks_wide * 4 * sizeof(uint16_t));
    }

    return best_rd;
  }
#endif  // CONFIG_VP9_HIGHBITDEPTH

854
  for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
855
    int64_t this_rd;
856
    int ratey = 0;
857
858
    int64_t distortion = 0;
    int rate = bmode_costs[mode];
859

860
    if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
861
862
      continue;

863
864
    // Only do the oblique modes if the best so far is
    // one of the neighboring directional modes
865
866
    if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
      if (conditional_skipintra(mode, *best_mode))
867
868
          continue;
    }
869

870
871
872
    vpx_memcpy(tempa, ta, sizeof(ta));
    vpx_memcpy(templ, tl, sizeof(tl));

Jim Bankoski's avatar
Jim Bankoski committed
873
874
    for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
      for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
875
        const int block = ib + idy * 2 + idx;
876
877
878
879
        const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
        uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
        int16_t *const src_diff = raster_block_offset_int16(BLOCK_8X8, block,
                                                            p->src_diff);
880
        tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
hkuang's avatar
hkuang committed
881
        xd->mi[0].src_mi->bmi[block].as_mode = mode;
882
        vp9_predict_intra_block(xd, block, 1,
883
                                TX_4X4, mode,
884
885
                                x->skip_encode ? src : dst,
                                x->skip_encode ? src_stride : dst_stride,
886
                                dst, dst_stride, idx, idy, 0);
887
888
889
890
891
892
893
        vp9_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride);

        if (xd->lossless) {
          const scan_order *so = &vp9_default_scan_orders[TX_4X4];
          vp9_fwht4x4(src_diff, coeff, 8);
          vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
          ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
894
895
                               so->scan, so->neighbors,
                               cpi->sf.use_fast_coef_costing);
896
897
898
899
900
901
902
903
          if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
            goto next;
          vp9_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block), dst, dst_stride,
                          p->eobs[block]);
        } else {
          int64_t unused;
          const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block);
          const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type];
904
          vp9_fht4x4(src_diff, coeff, 8, tx_type);
905
906
          vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
          ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
907
908
                             so->scan, so->neighbors,
                             cpi->sf.use_fast_coef_costing);
909
910
911
912
913
914
915
          distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block),
                                        16, &unused) >> 2;
          if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
            goto next;
          vp9_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block),
                         dst, dst_stride, p->eobs[block]);
        }
916
917
      }
    }
Jingning Han's avatar
Jingning Han committed
918

919
920
    rate += ratey;
    this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
Jingning Han's avatar
Jingning Han committed
921

922
923
924
925
926
927
    if (this_rd < best_rd) {
      *bestrate = rate;
      *bestratey = ratey;
      *bestdistortion = distortion;
      best_rd = this_rd;
      *best_mode = mode;
928
929
      vpx_memcpy(a, tempa, sizeof(tempa));
      vpx_memcpy(l, templ, sizeof(templ));
930
931
932
      for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
        vpx_memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
                   num_4x4_blocks_wide * 4);
John Koleszar's avatar
John Koleszar committed
933
    }
934
935
  next:
    {}
John Koleszar's avatar
John Koleszar committed
936
  }
937

938
  if (best_rd >= rd_thresh || x->skip_encode)
939
940
    return best_rd;

941
942
943
  for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
    vpx_memcpy(dst_init + idy * dst_stride, best_dst + idy * 8,
               num_4x4_blocks_wide * 4);
John Koleszar's avatar
John Koleszar committed
944

John Koleszar's avatar
John Koleszar committed
945
  return best_rd;
John Koleszar's avatar
John Koleszar committed
946
947
}

948
949
950
static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP *cpi, MACROBLOCK *mb,
                                            int *rate, int *rate_y,
                                            int64_t *distortion,
951
                                            int64_t best_rd) {