rdopt.c 150 KB
Newer Older
Jingning Han's avatar
Jingning Han committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
/*
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include <assert.h>
#include <math.h>

#include "./vp10_rtcd.h"
#include "./vpx_dsp_rtcd.h"

17
#include "vpx_dsp/vpx_dsp_common.h"
Jingning Han's avatar
Jingning Han committed
18
19
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
20
#include "vpx_ports/system_state.h"
Jingning Han's avatar
Jingning Han committed
21

22
23
24
25
26
27
28
29
30
31
32
#include "vp10/common/common.h"
#include "vp10/common/entropy.h"
#include "vp10/common/entropymode.h"
#include "vp10/common/idct.h"
#include "vp10/common/mvref_common.h"
#include "vp10/common/pred_common.h"
#include "vp10/common/quant_common.h"
#include "vp10/common/reconinter.h"
#include "vp10/common/reconintra.h"
#include "vp10/common/scan.h"
#include "vp10/common/seg_common.h"
Jingning Han's avatar
Jingning Han committed
33

34
35
36
37
38
39
40
41
42
43
#include "vp10/encoder/cost.h"
#include "vp10/encoder/encodemb.h"
#include "vp10/encoder/encodemv.h"
#include "vp10/encoder/encoder.h"
#include "vp10/encoder/mcomp.h"
#include "vp10/encoder/quantize.h"
#include "vp10/encoder/ratectrl.h"
#include "vp10/encoder/rd.h"
#include "vp10/encoder/rdopt.h"
#include "vp10/encoder/aq_variance.h"
Jingning Han's avatar
Jingning Han committed
44

clang-format's avatar
clang-format committed
45
46
47
48
49
50
#define LAST_FRAME_MODE_MASK \
  ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
#define GOLDEN_FRAME_MODE_MASK \
  ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
#define ALT_REF_MODE_MASK \
  ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | (1 << INTRA_FRAME))
Jingning Han's avatar
Jingning Han committed
51

clang-format's avatar
clang-format committed
52
#define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | 0x01)
Jingning Han's avatar
Jingning Han committed
53

clang-format's avatar
clang-format committed
54
55
#define MIN_EARLY_TERM_INDEX 3
#define NEW_MV_DISCOUNT_FACTOR 8
Jingning Han's avatar
Jingning Han committed
56

57
58
const double ext_tx_th = 0.99;

Jingning Han's avatar
Jingning Han committed
59
60
61
62
63
typedef struct {
  PREDICTION_MODE mode;
  MV_REFERENCE_FRAME ref_frame[2];
} MODE_DEFINITION;

clang-format's avatar
clang-format committed
64
typedef struct { MV_REFERENCE_FRAME ref_frame[2]; } REF_DEFINITION;
Jingning Han's avatar
Jingning Han committed
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82

struct rdcost_block_args {
  MACROBLOCK *x;
  ENTROPY_CONTEXT t_above[16];
  ENTROPY_CONTEXT t_left[16];
  int this_rate;
  int64_t this_dist;
  int64_t this_sse;
  int64_t this_rd;
  int64_t best_rd;
  int exit_early;
  int use_fast_coef_costing;
  const scan_order *so;
  uint8_t skippable;
};

#define LAST_NEW_MV_INDEX 6
static const MODE_DEFINITION vp10_mode_order[MAX_MODES] = {
clang-format's avatar
clang-format committed
83
84
85
  { NEARESTMV, { LAST_FRAME, NONE } },
  { NEARESTMV, { ALTREF_FRAME, NONE } },
  { NEARESTMV, { GOLDEN_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
86

clang-format's avatar
clang-format committed
87
  { DC_PRED, { INTRA_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
88

clang-format's avatar
clang-format committed
89
90
91
  { NEWMV, { LAST_FRAME, NONE } },
  { NEWMV, { ALTREF_FRAME, NONE } },
  { NEWMV, { GOLDEN_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
92

clang-format's avatar
clang-format committed
93
94
95
  { NEARMV, { LAST_FRAME, NONE } },
  { NEARMV, { ALTREF_FRAME, NONE } },
  { NEARMV, { GOLDEN_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
96

clang-format's avatar
clang-format committed
97
98
99
  { ZEROMV, { LAST_FRAME, NONE } },
  { ZEROMV, { GOLDEN_FRAME, NONE } },
  { ZEROMV, { ALTREF_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
100

clang-format's avatar
clang-format committed
101
102
  { NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
Jingning Han's avatar
Jingning Han committed
103

clang-format's avatar
clang-format committed
104
  { TM_PRED, { INTRA_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
105

clang-format's avatar
clang-format committed
106
107
108
109
  { NEARMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEWMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
Jingning Han's avatar
Jingning Han committed
110

clang-format's avatar
clang-format committed
111
112
  { ZEROMV, { LAST_FRAME, ALTREF_FRAME } },
  { ZEROMV, { GOLDEN_FRAME, ALTREF_FRAME } },
Jingning Han's avatar
Jingning Han committed
113

clang-format's avatar
clang-format committed
114
115
116
117
118
119
120
121
  { H_PRED, { INTRA_FRAME, NONE } },
  { V_PRED, { INTRA_FRAME, NONE } },
  { D135_PRED, { INTRA_FRAME, NONE } },
  { D207_PRED, { INTRA_FRAME, NONE } },
  { D153_PRED, { INTRA_FRAME, NONE } },
  { D63_PRED, { INTRA_FRAME, NONE } },
  { D117_PRED, { INTRA_FRAME, NONE } },
  { D45_PRED, { INTRA_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
122
123
124
};

static const REF_DEFINITION vp10_ref_order[MAX_REFS] = {
clang-format's avatar
clang-format committed
125
126
127
  { { LAST_FRAME, NONE } },           { { GOLDEN_FRAME, NONE } },
  { { ALTREF_FRAME, NONE } },         { { LAST_FRAME, ALTREF_FRAME } },
  { { GOLDEN_FRAME, ALTREF_FRAME } }, { { INTRA_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
128
129
};

clang-format's avatar
clang-format committed
130
131
static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int m, int n,
                           int min_plane, int max_plane) {
Jingning Han's avatar
Jingning Han committed
132
133
134
135
136
137
  int i;

  for (i = min_plane; i < max_plane; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &x->e_mbd.plane[i];

clang-format's avatar
clang-format committed
138
139
    p->coeff = ctx->coeff_pbuf[i][m];
    p->qcoeff = ctx->qcoeff_pbuf[i][m];
Jingning Han's avatar
Jingning Han committed
140
    pd->dqcoeff = ctx->dqcoeff_pbuf[i][m];
clang-format's avatar
clang-format committed
141
    p->eobs = ctx->eobs_pbuf[i][m];
Jingning Han's avatar
Jingning Han committed
142

clang-format's avatar
clang-format committed
143
144
    ctx->coeff_pbuf[i][m] = ctx->coeff_pbuf[i][n];
    ctx->qcoeff_pbuf[i][m] = ctx->qcoeff_pbuf[i][n];
Jingning Han's avatar
Jingning Han committed
145
    ctx->dqcoeff_pbuf[i][m] = ctx->dqcoeff_pbuf[i][n];
clang-format's avatar
clang-format committed
146
    ctx->eobs_pbuf[i][m] = ctx->eobs_pbuf[i][n];
Jingning Han's avatar
Jingning Han committed
147

clang-format's avatar
clang-format committed
148
149
    ctx->coeff_pbuf[i][n] = p->coeff;
    ctx->qcoeff_pbuf[i][n] = p->qcoeff;
Jingning Han's avatar
Jingning Han committed
150
    ctx->dqcoeff_pbuf[i][n] = pd->dqcoeff;
clang-format's avatar
clang-format committed
151
    ctx->eobs_pbuf[i][n] = p->eobs;
Jingning Han's avatar
Jingning Han committed
152
153
154
  }
}

clang-format's avatar
clang-format committed
155
156
157
158
static void model_rd_for_sb(VP10_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x,
                            MACROBLOCKD *xd, int *out_rate_sum,
                            int64_t *out_dist_sum, int *skip_txfm_sb,
                            int64_t *skip_sse_sb) {
Jingning Han's avatar
Jingning Han committed
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
  int i;
  int64_t rate_sum = 0;
  int64_t dist_sum = 0;
  const int ref = xd->mi[0]->mbmi.ref_frame[0];
  unsigned int sse;
  unsigned int var = 0;
  unsigned int sum_sse = 0;
  int64_t total_sse = 0;
  int skip_flag = 1;
  const int shift = 6;
  int rate;
  int64_t dist;
  const int dequant_shift =
175
#if CONFIG_VPX_HIGHBITDEPTH
clang-format's avatar
clang-format committed
176
      (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 :
177
#endif  // CONFIG_VPX_HIGHBITDEPTH
clang-format's avatar
clang-format committed
178
                                                    3;
Jingning Han's avatar
Jingning Han committed
179
180
181
182
183
184
185
186
187
188
189
190
191

  x->pred_sse[ref] = 0;

  for (i = 0; i < MAX_MB_PLANE; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &xd->plane[i];
    const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
    const TX_SIZE max_tx_size = max_txsize_lookup[bs];
    const BLOCK_SIZE unit_size = txsize_to_bsize[max_tx_size];
    const int64_t dc_thr = p->quant_thred[0] >> shift;
    const int64_t ac_thr = p->quant_thred[1] >> shift;
    // The low thresholds are used to measure if the prediction errors are
    // low enough so that we can skip the mode search.
192
193
    const int64_t low_dc_thr = VPXMIN(50, dc_thr >> 2);
    const int64_t low_ac_thr = VPXMIN(80, ac_thr >> 2);
Jingning Han's avatar
Jingning Han committed
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
    int bw = 1 << (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
    int bh = 1 << (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
    int idx, idy;
    int lw = b_width_log2_lookup[unit_size] + 2;
    int lh = b_height_log2_lookup[unit_size] + 2;

    sum_sse = 0;

    for (idy = 0; idy < bh; ++idy) {
      for (idx = 0; idx < bw; ++idx) {
        uint8_t *src = p->src.buf + (idy * p->src.stride << lh) + (idx << lw);
        uint8_t *dst = pd->dst.buf + (idy * pd->dst.stride << lh) + (idx << lh);
        int block_idx = (idy << 1) + idx;
        int low_err_skip = 0;

clang-format's avatar
clang-format committed
209
210
        var = cpi->fn_ptr[unit_size].vf(src, p->src.stride, dst, pd->dst.stride,
                                        &sse);
Jingning Han's avatar
Jingning Han committed
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
        x->bsse[(i << 2) + block_idx] = sse;
        sum_sse += sse;

        x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_NONE;
        if (!x->select_tx_size) {
          // Check if all ac coefficients can be quantized to zero.
          if (var < ac_thr || var == 0) {
            x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_AC_ONLY;

            // Check if dc coefficient can be quantized to zero.
            if (sse - var < dc_thr || sse == var) {
              x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_AC_DC;

              if (!sse || (var < low_ac_thr && sse - var < low_dc_thr))
                low_err_skip = 1;
            }
          }
        }

clang-format's avatar
clang-format committed
230
        if (skip_flag && !low_err_skip) skip_flag = 0;
Jingning Han's avatar
Jingning Han committed
231

clang-format's avatar
clang-format committed
232
        if (i == 0) x->pred_sse[ref] += sse;
Jingning Han's avatar
Jingning Han committed
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
      }
    }

    total_sse += sum_sse;

    // Fast approximate the modelling function.
    if (cpi->sf.simple_model_rd_from_var) {
      int64_t rate;
      const int64_t square_error = sum_sse;
      int quantizer = (pd->dequant[1] >> dequant_shift);

      if (quantizer < 120)
        rate = (square_error * (280 - quantizer)) >> 8;
      else
        rate = 0;
      dist = (square_error * quantizer) >> 8;
      rate_sum += rate;
      dist_sum += dist;
    } else {
      vp10_model_rd_from_var_lapndz(sum_sse, num_pels_log2_lookup[bs],
clang-format's avatar
clang-format committed
253
254
                                    pd->dequant[1] >> dequant_shift, &rate,
                                    &dist);
Jingning Han's avatar
Jingning Han committed
255
256
257
258
259
260
261
262
263
264
265
266
      rate_sum += rate;
      dist_sum += dist;
    }
  }

  *skip_txfm_sb = skip_flag;
  *skip_sse_sb = total_sse << 4;
  *out_rate_sum = (int)rate_sum;
  *out_dist_sum = dist_sum << 4;
}

int64_t vp10_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
clang-format's avatar
clang-format committed
267
                           intptr_t block_size, int64_t *ssz) {
Jingning Han's avatar
Jingning Han committed
268
269
270
271
272
  int i;
  int64_t error = 0, sqcoeff = 0;

  for (i = 0; i < block_size; i++) {
    const int diff = coeff[i] - dqcoeff[i];
clang-format's avatar
clang-format committed
273
    error += diff * diff;
Jingning Han's avatar
Jingning Han committed
274
275
276
277
278
279
280
281
    sqcoeff += coeff[i] * coeff[i];
  }

  *ssz = sqcoeff;
  return error;
}

int64_t vp10_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff,
clang-format's avatar
clang-format committed
282
                              int block_size) {
Jingning Han's avatar
Jingning Han committed
283
284
285
286
287
  int i;
  int64_t error = 0;

  for (i = 0; i < block_size; i++) {
    const int diff = coeff[i] - dqcoeff[i];
clang-format's avatar
clang-format committed
288
    error += diff * diff;
Jingning Han's avatar
Jingning Han committed
289
290
291
292
293
  }

  return error;
}

294
#if CONFIG_VPX_HIGHBITDEPTH
Jingning Han's avatar
Jingning Han committed
295
int64_t vp10_highbd_block_error_c(const tran_low_t *coeff,
clang-format's avatar
clang-format committed
296
297
                                  const tran_low_t *dqcoeff,
                                  intptr_t block_size, int64_t *ssz, int bd) {
Jingning Han's avatar
Jingning Han committed
298
299
300
301
302
303
304
  int i;
  int64_t error = 0, sqcoeff = 0;
  int shift = 2 * (bd - 8);
  int rounding = shift > 0 ? 1 << (shift - 1) : 0;

  for (i = 0; i < block_size; i++) {
    const int64_t diff = coeff[i] - dqcoeff[i];
clang-format's avatar
clang-format committed
305
    error += diff * diff;
Jingning Han's avatar
Jingning Han committed
306
307
308
309
310
311
312
313
314
    sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
  }
  assert(error >= 0 && sqcoeff >= 0);
  error = (error + rounding) >> shift;
  sqcoeff = (sqcoeff + rounding) >> shift;

  *ssz = sqcoeff;
  return error;
}
315
#endif  // CONFIG_VPX_HIGHBITDEPTH
Jingning Han's avatar
Jingning Han committed
316
317
318
319
320
321
322

/* The trailing '0' is a terminator which is used inside cost_coeffs() to
 * decide whether to include cost of a trailing EOB node or not (i.e. we
 * can skip this if the last coefficient in this transform block, e.g. the
 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
 * were non-zero). */
static const int16_t band_counts[TX_SIZES][8] = {
clang-format's avatar
clang-format committed
323
324
325
  { 1, 2, 3, 4, 3, 16 - 13, 0 },
  { 1, 2, 3, 4, 11, 64 - 21, 0 },
  { 1, 2, 3, 4, 11, 256 - 21, 0 },
Jingning Han's avatar
Jingning Han committed
326
327
  { 1, 2, 3, 4, 11, 1024 - 21, 0 },
};
clang-format's avatar
clang-format committed
328
329
330
static int cost_coeffs(MACROBLOCK *x, int plane, int block, ENTROPY_CONTEXT *A,
                       ENTROPY_CONTEXT *L, TX_SIZE tx_size, const int16_t *scan,
                       const int16_t *nb, int use_fast_coef_costing) {
Jingning Han's avatar
Jingning Han committed
331
332
333
334
335
336
337
338
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
  const struct macroblock_plane *p = &x->plane[plane];
  const struct macroblockd_plane *pd = &xd->plane[plane];
  const PLANE_TYPE type = pd->plane_type;
  const int16_t *band_count = &band_counts[tx_size][1];
  const int eob = p->eobs[block];
  const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
clang-format's avatar
clang-format committed
339
340
  unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
      x->token_costs[tx_size][type][is_inter_block(mbmi)];
Jingning Han's avatar
Jingning Han committed
341
342
343
  uint8_t token_cache[32 * 32];
  int pt = combine_entropy_contexts(*A, *L);
  int c, cost;
344
#if CONFIG_VPX_HIGHBITDEPTH
Jingning Han's avatar
Jingning Han committed
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
  const int16_t *cat6_high_cost = vp10_get_high_cost_table(xd->bd);
#else
  const int16_t *cat6_high_cost = vp10_get_high_cost_table(8);
#endif

  // Check for consistency of tx_size with mode info
  assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
                              : get_uv_tx_size(mbmi, pd) == tx_size);

  if (eob == 0) {
    // single eob token
    cost = token_costs[0][0][pt][EOB_TOKEN];
    c = 0;
  } else {
    int band_left = *band_count++;

    // dc token
    int v = qcoeff[0];
    int16_t prev_t;
    EXTRABIT e;
    vp10_get_token_extra(v, &prev_t, &e);
    cost = (*token_costs)[0][pt][prev_t] +
clang-format's avatar
clang-format committed
367
           vp10_get_cost(prev_t, e, cat6_high_cost);
Jingning Han's avatar
Jingning Han committed
368
369
370
371
372
373
374
375
376
377
378
379
380

    token_cache[0] = vp10_pt_energy_class[prev_t];
    ++token_costs;

    // ac tokens
    for (c = 1; c < eob; c++) {
      const int rc = scan[c];
      int16_t t;

      v = qcoeff[rc];
      vp10_get_token_extra(v, &t, &e);
      if (use_fast_coef_costing) {
        cost += (*token_costs)[!prev_t][!prev_t][t] +
clang-format's avatar
clang-format committed
381
                vp10_get_cost(t, e, cat6_high_cost);
Jingning Han's avatar
Jingning Han committed
382
383
384
      } else {
        pt = get_coef_context(nb, token_cache, c);
        cost += (*token_costs)[!prev_t][pt][t] +
clang-format's avatar
clang-format committed
385
                vp10_get_cost(t, e, cat6_high_cost);
Jingning Han's avatar
Jingning Han committed
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
        token_cache[rc] = vp10_pt_energy_class[t];
      }
      prev_t = t;
      if (!--band_left) {
        band_left = *band_count++;
        ++token_costs;
      }
    }

    // eob token
    if (band_left) {
      if (use_fast_coef_costing) {
        cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
      } else {
        pt = get_coef_context(nb, token_cache, c);
        cost += (*token_costs)[0][pt][EOB_TOKEN];
      }
    }
  }

  // is eob first coefficient;
  *A = *L = (c > 0);

  return cost;
}

static void dist_block(MACROBLOCK *x, int plane, int block, TX_SIZE tx_size,
                       int64_t *out_dist, int64_t *out_sse) {
  const int ss_txfrm_size = tx_size << 1;
clang-format's avatar
clang-format committed
415
  MACROBLOCKD *const xd = &x->e_mbd;
Jingning Han's avatar
Jingning Han committed
416
417
418
419
420
421
  const struct macroblock_plane *const p = &x->plane[plane];
  const struct macroblockd_plane *const pd = &xd->plane[plane];
  int64_t this_sse;
  int shift = tx_size == TX_32X32 ? 0 : 2;
  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
422
#if CONFIG_VPX_HIGHBITDEPTH
Jingning Han's avatar
Jingning Han committed
423
424
  const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8;
  *out_dist = vp10_highbd_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
clang-format's avatar
clang-format committed
425
426
                                      &this_sse, bd) >>
              shift;
Jingning Han's avatar
Jingning Han committed
427
#else
clang-format's avatar
clang-format committed
428
429
  *out_dist =
      vp10_block_error(coeff, dqcoeff, 16 << ss_txfrm_size, &this_sse) >> shift;
430
#endif  // CONFIG_VPX_HIGHBITDEPTH
Jingning Han's avatar
Jingning Han committed
431
432
433
  *out_sse = this_sse >> shift;
}

434
static int rate_block(int plane, int block, int blk_row, int blk_col,
clang-format's avatar
clang-format committed
435
                      TX_SIZE tx_size, struct rdcost_block_args *args) {
436
  return cost_coeffs(args->x, plane, block, args->t_above + blk_col,
clang-format's avatar
clang-format committed
437
438
                     args->t_left + blk_row, tx_size, args->so->scan,
                     args->so->neighbors, args->use_fast_coef_costing);
Jingning Han's avatar
Jingning Han committed
439
440
}

441
static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
clang-format's avatar
clang-format committed
442
                          BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
Jingning Han's avatar
Jingning Han committed
443
444
445
446
447
448
449
450
451
  struct rdcost_block_args *args = arg;
  MACROBLOCK *const x = args->x;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
  int64_t rd1, rd2, rd;
  int rate;
  int64_t dist;
  int64_t sse;

clang-format's avatar
clang-format committed
452
  if (args->exit_early) return;
Jingning Han's avatar
Jingning Han committed
453
454

  if (!is_inter_block(mbmi)) {
clang-format's avatar
clang-format committed
455
456
457
    struct encode_b_args arg = { x, NULL, &mbmi->skip };
    vp10_encode_block_intra(plane, block, blk_row, blk_col, plane_bsize,
                            tx_size, &arg);
Jingning Han's avatar
Jingning Han committed
458
459
460
461
462
    dist_block(x, plane, block, tx_size, &dist, &sse);
  } else if (max_txsize_lookup[plane_bsize] == tx_size) {
    if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] ==
        SKIP_TXFM_NONE) {
      // full forward transform and quantization
clang-format's avatar
clang-format committed
463
      vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size);
Jingning Han's avatar
Jingning Han committed
464
465
466
467
      dist_block(x, plane, block, tx_size, &dist, &sse);
    } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] ==
               SKIP_TXFM_AC_ONLY) {
      // compute DC coefficient
clang-format's avatar
clang-format committed
468
      tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block);
Jingning Han's avatar
Jingning Han committed
469
      tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
clang-format's avatar
clang-format committed
470
471
472
      vp10_xform_quant_dc(x, plane, block, blk_row, blk_col, plane_bsize,
                          tx_size);
      sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
Jingning Han's avatar
Jingning Han committed
473
474
475
476
477
      dist = sse;
      if (x->plane[plane].eobs[block]) {
        const int64_t orig_sse = (int64_t)coeff[0] * coeff[0];
        const int64_t resd_sse = coeff[0] - dqcoeff[0];
        int64_t dc_correct = orig_sse - resd_sse * resd_sse;
478
#if CONFIG_VPX_HIGHBITDEPTH
Jingning Han's avatar
Jingning Han committed
479
480
        dc_correct >>= ((xd->bd - 8) * 2);
#endif
clang-format's avatar
clang-format committed
481
        if (tx_size != TX_32X32) dc_correct >>= 2;
Jingning Han's avatar
Jingning Han committed
482

483
        dist = VPXMAX(0, sse - dc_correct);
Jingning Han's avatar
Jingning Han committed
484
485
486
487
488
      }
    } else {
      // SKIP_TXFM_AC_DC
      // skip forward transform
      x->plane[plane].eobs[block] = 0;
clang-format's avatar
clang-format committed
489
      sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
Jingning Han's avatar
Jingning Han committed
490
491
492
493
      dist = sse;
    }
  } else {
    // full forward transform and quantization
494
    vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size);
Jingning Han's avatar
Jingning Han committed
495
496
497
498
499
500
501
502
503
    dist_block(x, plane, block, tx_size, &dist, &sse);
  }

  rd = RDCOST(x->rdmult, x->rddiv, 0, dist);
  if (args->this_rd + rd > args->best_rd) {
    args->exit_early = 1;
    return;
  }

504
  rate = rate_block(plane, block, blk_row, blk_col, tx_size, args);
Jingning Han's avatar
Jingning Han committed
505
506
507
508
  rd1 = RDCOST(x->rdmult, x->rddiv, rate, dist);
  rd2 = RDCOST(x->rdmult, x->rddiv, 0, sse);

  // TODO(jingning): temporarily enabled only for luma component
509
  rd = VPXMIN(rd1, rd2);
Jingning Han's avatar
Jingning Han committed
510
  if (plane == 0)
clang-format's avatar
clang-format committed
511
512
    x->zcoeff_blk[tx_size][block] =
        !x->plane[plane].eobs[block] ||
513
        (rd1 > rd2 && !xd->lossless[mbmi->segment_id]);
Jingning Han's avatar
Jingning Han committed
514
515
516
517
518
519
520
521
522
523
524
525
526
527

  args->this_rate += rate;
  args->this_dist += dist;
  args->this_sse += sse;
  args->this_rd += rd;

  if (args->this_rd > args->best_rd) {
    args->exit_early = 1;
    return;
  }

  args->skippable &= !x->plane[plane].eobs[block];
}

clang-format's avatar
clang-format committed
528
529
530
static void txfm_rd_in_plane(MACROBLOCK *x, int *rate, int64_t *distortion,
                             int *skippable, int64_t *sse, int64_t ref_best_rd,
                             int plane, BLOCK_SIZE bsize, TX_SIZE tx_size,
Jingning Han's avatar
Jingning Han committed
531
532
533
                             int use_fast_coef_casting) {
  MACROBLOCKD *const xd = &x->e_mbd;
  const struct macroblockd_plane *const pd = &xd->plane[plane];
hui su's avatar
hui su committed
534
  TX_TYPE tx_type;
Jingning Han's avatar
Jingning Han committed
535
536
537
538
539
540
541
  struct rdcost_block_args args;
  vp10_zero(args);
  args.x = x;
  args.best_rd = ref_best_rd;
  args.use_fast_coef_costing = use_fast_coef_casting;
  args.skippable = 1;

clang-format's avatar
clang-format committed
542
  if (plane == 0) xd->mi[0]->mbmi.tx_size = tx_size;
Jingning Han's avatar
Jingning Han committed
543
544
545

  vp10_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);

hui su's avatar
hui su committed
546
547
  tx_type = get_tx_type(pd->plane_type, xd, 0);
  args.so = get_scan(tx_size, tx_type);
Jingning Han's avatar
Jingning Han committed
548

clang-format's avatar
clang-format committed
549
550
  vp10_foreach_transformed_block_in_plane(xd, bsize, plane, block_rd_txfm,
                                          &args);
Jingning Han's avatar
Jingning Han committed
551
  if (args.exit_early) {
clang-format's avatar
clang-format committed
552
    *rate = INT_MAX;
Jingning Han's avatar
Jingning Han committed
553
    *distortion = INT64_MAX;
clang-format's avatar
clang-format committed
554
555
    *sse = INT64_MAX;
    *skippable = 0;
Jingning Han's avatar
Jingning Han committed
556
557
  } else {
    *distortion = args.this_dist;
clang-format's avatar
clang-format committed
558
559
560
    *rate = args.this_rate;
    *sse = args.this_sse;
    *skippable = args.skippable;
Jingning Han's avatar
Jingning Han committed
561
562
563
  }
}

clang-format's avatar
clang-format committed
564
565
566
static void choose_largest_tx_size(VP10_COMP *cpi, MACROBLOCK *x, int *rate,
                                   int64_t *distortion, int *skip, int64_t *sse,
                                   int64_t ref_best_rd, BLOCK_SIZE bs) {
Jingning Han's avatar
Jingning Han committed
567
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
Yaowu Xu's avatar
Yaowu Xu committed
568
  VP10_COMMON *const cm = &cpi->common;
Jingning Han's avatar
Jingning Han committed
569
570
571
572
  const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;

573
574
575
576
  TX_TYPE tx_type, best_tx_type = DCT_DCT;
  int r, s;
  int64_t d, psse, this_rd, best_rd = INT64_MAX;
  vpx_prob skip_prob = vp10_get_skip_prob(cm, xd);
clang-format's avatar
clang-format committed
577
578
  int s0 = vp10_cost_bit(skip_prob, 0);
  int s1 = vp10_cost_bit(skip_prob, 1);
579
580
  const int is_inter = is_inter_block(mbmi);

581
  mbmi->tx_size = VPXMIN(max_tx_size, largest_tx_size);
clang-format's avatar
clang-format committed
582
  if (mbmi->tx_size < TX_32X32 && !xd->lossless[mbmi->segment_id]) {
583
584
    for (tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
      mbmi->tx_type = tx_type;
clang-format's avatar
clang-format committed
585
      txfm_rd_in_plane(x, &r, &d, &s, &psse, ref_best_rd, 0, bs, mbmi->tx_size,
586
                       cpi->sf.use_fast_coef_costing);
clang-format's avatar
clang-format committed
587
      if (r == INT_MAX) continue;
588
589
590
591
592
593
594
595
596
597
598
599
      if (is_inter)
        r += cpi->inter_tx_type_costs[mbmi->tx_size][mbmi->tx_type];
      else
        r += cpi->intra_tx_type_costs[mbmi->tx_size]
                                     [intra_mode_to_tx_type_context[mbmi->mode]]
                                     [mbmi->tx_type];
      if (s)
        this_rd = RDCOST(x->rdmult, x->rddiv, s1, psse);
      else
        this_rd = RDCOST(x->rdmult, x->rddiv, r + s0, d);
      if (is_inter && !xd->lossless[mbmi->segment_id] && !s)
        this_rd = VPXMIN(this_rd, RDCOST(x->rdmult, x->rddiv, s1, psse));
Jingning Han's avatar
Jingning Han committed
600

601
602
603
604
605
606
607
      if (this_rd < ((best_tx_type == DCT_DCT) ? ext_tx_th : 1) * best_rd) {
        best_rd = this_rd;
        best_tx_type = mbmi->tx_type;
      }
    }
  }
  mbmi->tx_type = best_tx_type;
clang-format's avatar
clang-format committed
608
  txfm_rd_in_plane(x, rate, distortion, skip, sse, ref_best_rd, 0, bs,
Jingning Han's avatar
Jingning Han committed
609
                   mbmi->tx_size, cpi->sf.use_fast_coef_costing);
610
611
  if (mbmi->tx_size < TX_32X32 && !xd->lossless[mbmi->segment_id] &&
      *rate != INT_MAX) {
612
613
614
    if (is_inter)
      *rate += cpi->inter_tx_type_costs[mbmi->tx_size][mbmi->tx_type];
    else
clang-format's avatar
clang-format committed
615
616
617
      *rate += cpi->intra_tx_type_costs
                   [mbmi->tx_size][intra_mode_to_tx_type_context[mbmi->mode]]
                   [mbmi->tx_type];
618
  }
Jingning Han's avatar
Jingning Han committed
619
620
}

clang-format's avatar
clang-format committed
621
622
623
static void choose_smallest_tx_size(VP10_COMP *cpi, MACROBLOCK *x, int *rate,
                                    int64_t *distortion, int *skip,
                                    int64_t *sse, int64_t ref_best_rd,
624
625
626
627
628
629
                                    BLOCK_SIZE bs) {
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;

  mbmi->tx_size = TX_4X4;

clang-format's avatar
clang-format committed
630
  txfm_rd_in_plane(x, rate, distortion, skip, sse, ref_best_rd, 0, bs,
631
632
633
                   mbmi->tx_size, cpi->sf.use_fast_coef_costing);
}

clang-format's avatar
clang-format committed
634
635
636
static void choose_tx_size_from_rd(VP10_COMP *cpi, MACROBLOCK *x, int *rate,
                                   int64_t *distortion, int *skip,
                                   int64_t *psse, int64_t ref_best_rd,
Jingning Han's avatar
Jingning Han committed
637
638
                                   BLOCK_SIZE bs) {
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
Yaowu Xu's avatar
Yaowu Xu committed
639
  VP10_COMMON *const cm = &cpi->common;
Jingning Han's avatar
Jingning Han committed
640
641
642
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
  vpx_prob skip_prob = vp10_get_skip_prob(cm, xd);
643
644
645
  int r, s;
  int64_t d, sse;
  int64_t rd = INT64_MAX;
Jingning Han's avatar
Jingning Han committed
646
647
  int n, m;
  int s0, s1;
648
  int64_t best_rd = INT64_MAX, last_rd = INT64_MAX;
Jingning Han's avatar
Jingning Han committed
649
650
  TX_SIZE best_tx = max_tx_size;
  int start_tx, end_tx;
651
652
653
  const int tx_select = cm->tx_mode == TX_MODE_SELECT;
  TX_TYPE tx_type, best_tx_type = DCT_DCT;
  const int is_inter = is_inter_block(mbmi);
Jingning Han's avatar
Jingning Han committed
654
655
656
657
658
659

  const vpx_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs);
  assert(skip_prob > 0);
  s0 = vp10_cost_bit(skip_prob, 0);
  s1 = vp10_cost_bit(skip_prob, 1);

660
  if (tx_select) {
Jingning Han's avatar
Jingning Han committed
661
662
663
    start_tx = max_tx_size;
    end_tx = 0;
  } else {
664
665
    const TX_SIZE chosen_tx_size =
        VPXMIN(max_tx_size, tx_mode_to_biggest_tx_size[cm->tx_mode]);
Jingning Han's avatar
Jingning Han committed
666
667
668
669
    start_tx = chosen_tx_size;
    end_tx = chosen_tx_size;
  }

670
  *distortion = INT64_MAX;
clang-format's avatar
clang-format committed
671
672
673
  *rate = INT_MAX;
  *skip = 0;
  *psse = INT64_MAX;
674
675
676
677
678

  for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
    last_rd = INT64_MAX;
    for (n = start_tx; n >= end_tx; --n) {
      int r_tx_size = 0;
clang-format's avatar
clang-format committed
679
      for (m = 0; m <= n - (n == (int)max_tx_size); ++m) {
680
681
682
683
684
685
686
687
688
689
        if (m == n)
          r_tx_size += vp10_cost_zero(tx_probs[m]);
        else
          r_tx_size += vp10_cost_one(tx_probs[m]);
      }

      if (n >= TX_32X32 && tx_type != DCT_DCT) {
        continue;
      }
      mbmi->tx_type = tx_type;
clang-format's avatar
clang-format committed
690
      txfm_rd_in_plane(x, &r, &d, &s, &sse, ref_best_rd, 0, bs, n,
691
                       cpi->sf.use_fast_coef_costing);
clang-format's avatar
clang-format committed
692
      if (n < TX_32X32 && !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
693
694
695
696
          r != INT_MAX) {
        if (is_inter)
          r += cpi->inter_tx_type_costs[mbmi->tx_size][mbmi->tx_type];
        else
clang-format's avatar
clang-format committed
697
698
699
          r += cpi->intra_tx_type_costs
                   [mbmi->tx_size][intra_mode_to_tx_type_context[mbmi->mode]]
                   [mbmi->tx_type];
700
701
      }

clang-format's avatar
clang-format committed
702
      if (r == INT_MAX) continue;
703
704
705
706
707

      if (s) {
        if (is_inter) {
          rd = RDCOST(x->rdmult, x->rddiv, s1, sse);
        } else {
clang-format's avatar
clang-format committed
708
          rd = RDCOST(x->rdmult, x->rddiv, s1 + r_tx_size * tx_select, sse);
709
        }
710
      } else {
711
        rd = RDCOST(x->rdmult, x->rddiv, r + s0 + r_tx_size * tx_select, d);
712
      }
Jingning Han's avatar
Jingning Han committed
713

clang-format's avatar
clang-format committed
714
      if (tx_select && !(s && is_inter)) r += r_tx_size;
715

716
717
      if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] && !s)
        rd = VPXMIN(rd, RDCOST(x->rdmult, x->rddiv, s1, sse));
Jingning Han's avatar
Jingning Han committed
718

719
720
      // Early termination in transform size search.
      if (cpi->sf.tx_size_search_breakout &&
clang-format's avatar
clang-format committed
721
722
          (rd == INT64_MAX || (s == 1 && tx_type != DCT_DCT && n < start_tx) ||
           (n < (int)max_tx_size && rd > last_rd)))
723
724
725
726
        break;

      last_rd = rd;
      if (rd <
clang-format's avatar
clang-format committed
727
          (is_inter && best_tx_type == DCT_DCT ? ext_tx_th : 1) * best_rd) {
728
729
730
        best_tx = n;
        best_rd = rd;
        *distortion = d;
clang-format's avatar
clang-format committed
731
732
733
        *rate = r;
        *skip = s;
        *psse = sse;
734
735
        best_tx_type = mbmi->tx_type;
      }
Jingning Han's avatar
Jingning Han committed
736
737
738
    }
  }

739
740
  mbmi->tx_size = best_tx;
  mbmi->tx_type = best_tx_type;
clang-format's avatar
clang-format committed
741
742
  if (mbmi->tx_size >= TX_32X32) assert(mbmi->tx_type == DCT_DCT);
  txfm_rd_in_plane(x, &r, &d, &s, &sse, ref_best_rd, 0, bs, best_tx,
743
                   cpi->sf.use_fast_coef_costing);
Jingning Han's avatar
Jingning Han committed
744
745
}

Yaowu Xu's avatar
Yaowu Xu committed
746
static void super_block_yrd(VP10_COMP *cpi, MACROBLOCK *x, int *rate,
clang-format's avatar
clang-format committed
747
748
                            int64_t *distortion, int *skip, int64_t *psse,
                            BLOCK_SIZE bs, int64_t ref_best_rd) {
Jingning Han's avatar
Jingning Han committed
749
750
751
752
753
754
  MACROBLOCKD *xd = &x->e_mbd;
  int64_t sse;
  int64_t *ret_sse = psse ? psse : &sse;

  assert(bs == xd->mi[0]->mbmi.sb_type);

755
  if (CONFIG_MISC_FIXES && xd->lossless[0]) {
756
757
758
759
    choose_smallest_tx_size(cpi, x, rate, distortion, skip, ret_sse,
                            ref_best_rd, bs);
  } else if (cpi->sf.tx_size_search_method == USE_LARGESTALL ||
             xd->lossless[xd->mi[0]->mbmi.segment_id]) {
Jingning Han's avatar
Jingning Han committed
760
761
762
    choose_largest_tx_size(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd,
                           bs);
  } else {
clang-format's avatar
clang-format committed
763
764
    choose_tx_size_from_rd(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd,
                           bs);
Jingning Han's avatar
Jingning Han committed
765
766
767
768
769
  }
}

static int conditional_skipintra(PREDICTION_MODE mode,
                                 PREDICTION_MODE best_intra_mode) {
clang-format's avatar
clang-format committed
770
  if (mode == D117_PRED && best_intra_mode != V_PRED &&
Jingning Han's avatar
Jingning Han committed
771
772
      best_intra_mode != D135_PRED)
    return 1;
clang-format's avatar
clang-format committed
773
  if (mode == D63_PRED && best_intra_mode != V_PRED &&
Jingning Han's avatar
Jingning Han committed
774
775
      best_intra_mode != D45_PRED)
    return 1;
clang-format's avatar
clang-format committed
776
  if (mode == D207_PRED && best_intra_mode != H_PRED &&
Jingning Han's avatar
Jingning Han committed
777
778
      best_intra_mode != D45_PRED)
    return 1;
clang-format's avatar
clang-format committed
779
  if (mode == D153_PRED && best_intra_mode != H_PRED &&
Jingning Han's avatar
Jingning Han committed
780
781
782
783
784
      best_intra_mode != D135_PRED)
    return 1;
  return 0;
}

clang-format's avatar
clang-format committed
785
786
787
788
789
static int64_t rd_pick_intra4x4block(VP10_COMP *cpi, MACROBLOCK *x, int row,
                                     int col, PREDICTION_MODE *best_mode,
                                     const int *bmode_costs, ENTROPY_CONTEXT *a,
                                     ENTROPY_CONTEXT *l, int *bestrate,
                                     int *bestratey, int64_t *bestdistortion,
Jingning Han's avatar
Jingning Han committed
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
                                     BLOCK_SIZE bsize, int64_t rd_thresh) {
  PREDICTION_MODE mode;
  MACROBLOCKD *const xd = &x->e_mbd;
  int64_t best_rd = rd_thresh;
  struct macroblock_plane *p = &x->plane[0];
  struct macroblockd_plane *pd = &xd->plane[0];
  const int src_stride = p->src.stride;
  const int dst_stride = pd->dst.stride;
  const uint8_t *src_init = &p->src.buf[row * 4 * src_stride + col * 4];
  uint8_t *dst_init = &pd->dst.buf[row * 4 * src_stride + col * 4];
  ENTROPY_CONTEXT ta[2], tempa[2];
  ENTROPY_CONTEXT tl[2], templ[2];
  const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
  const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
  int idx, idy;
  uint8_t best_dst[8 * 8];
806
#if CONFIG_VPX_HIGHBITDEPTH
Jingning Han's avatar
Jingning Han committed
807
808
809
810
811
812
813
  uint16_t best_dst16[8 * 8];
#endif

  memcpy(ta, a, sizeof(ta));
  memcpy(tl, l, sizeof(tl));
  xd->mi[0]->mbmi.tx_size = TX_4X4;

814
#if CONFIG_VPX_HIGHBITDEPTH
Jingning Han's avatar
Jingning Han committed
815
816
817
818
819
820
821
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
      int64_t this_rd;
      int ratey = 0;
      int64_t distortion = 0;
      int rate = bmode_costs[mode];

clang-format's avatar
clang-format committed
822
      if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode))) continue;
Jingning Han's avatar
Jingning Han committed
823
824
825
826

      // Only do the oblique modes if the best so far is
      // one of the neighboring directional modes
      if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
clang-format's avatar
clang-format committed
827
        if (conditional_skipintra(mode, *best_mode)) continue;
Jingning Han's avatar
Jingning Han committed
828
829
830
831
832
833
834
835
836
837
      }

      memcpy(tempa, ta, sizeof(ta));
      memcpy(templ, tl, sizeof(tl));

      for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
        for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
          const int block = (row + idy) * 2 + (col + idx);
          const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
          uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
clang-format's avatar
clang-format committed
838
839
          int16_t *const src_diff =
              vp10_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
Jingning Han's avatar
Jingning Han committed
840
841
          tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
          xd->mi[0]->bmi[block].as_mode = mode;
clang-format's avatar
clang-format committed
842
843
844
845
          vp10_predict_intra_block(xd, 1, 1, TX_4X4, mode, dst, dst_stride, dst,
                                   dst_stride, col + idx, row + idy, 0);
          vpx_highbd_subtract_block(4, 4, src_diff, 8, src, src_stride, dst,
                                    dst_stride, xd->bd);
846
          if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
hui su's avatar
hui su committed
847
848
            TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block);
            const scan_order *so = get_scan(TX_4X4, tx_type);
849
            vp10_highbd_fwd_txfm_4x4(src_diff, coeff, 8, DCT_DCT, 1);
Jingning Han's avatar
Jingning Han committed
850
851
852
853
854
855
            vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
            ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
                                 so->scan, so->neighbors,
                                 cpi->sf.use_fast_coef_costing);
            if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
              goto next_highbd;
clang-format's avatar
clang-format committed
856
857
858
            vp10_highbd_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block), dst,
                                         dst_stride, p->eobs[block], xd->bd,
                                         DCT_DCT, 1);
Jingning Han's avatar
Jingning Han committed
859
860
          } else {
            int64_t unused;
hui su's avatar
hui su committed
861
862
            TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block);
            const scan_order *so = get_scan(TX_4X4, tx_type);
863
            vp10_highbd_fwd_txfm_4x4(src_diff, coeff, 8, tx_type, 0);
Jingning Han's avatar
Jingning Han committed
864
865
866
867
            vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
            ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
                                 so->scan, so->neighbors,
                                 cpi->sf.use_fast_coef_costing);
clang-format's avatar
clang-format committed
868
869
870
871
            distortion +=
                vp10_highbd_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block),
                                        16, &unused, xd->bd) >>
                2;
Jingning Han's avatar
Jingning Han committed
872
873
            if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
              goto next_highbd;
clang-format's avatar
clang-format committed
874
875
876
            vp10_highbd_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block), dst,
                                         dst_stride, p->eobs[block], xd->bd,
                                         tx_type, 0);
Jingning Han's avatar
Jingning Han committed
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
          }
        }
      }

      rate += ratey;
      this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);

      if (this_rd < best_rd) {
        *bestrate = rate;
        *bestratey = ratey;
        *bestdistortion = distortion;
        best_rd = this_rd;
        *best_mode = mode;
        memcpy(a, tempa, sizeof(tempa));
        memcpy(l, templ, sizeof(templ));
        for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
          memcpy(best_dst16 + idy * 8,
                 CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
                 num_4x4_blocks_wide * 4 * sizeof(uint16_t));
        }
      }
clang-format's avatar
clang-format committed
898
    next_highbd : {}
Jingning Han's avatar
Jingning Han committed
899
    }
clang-format's avatar
clang-format committed
900
    if (best_rd >= rd_thresh) return best_rd;
Jingning Han's avatar
Jingning Han committed
901
902
903

    for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
      memcpy(CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
clang-format's avatar
clang-format committed
904
             best_dst16 + idy * 8, num_4x4_blocks_wide * 4 * sizeof(uint16_t));
Jingning Han's avatar
Jingning Han committed
905
906
907
908
    }

    return best_rd;
  }
909
#endif  // CONFIG_VPX_HIGHBITDEPTH
Jingning Han's avatar
Jingning Han committed
910
911
912
913
914
915
916

  for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
    int64_t this_rd;
    int ratey = 0;
    int64_t distortion = 0;
    int rate = bmode_costs[mode];

clang-format's avatar
clang-format committed
917
    if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode))) continue;
Jingning Han's avatar
Jingning Han committed
918
919
920
921

    // Only do the oblique modes if the best so far is
    // one of the neighboring directional modes
    if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
clang-format's avatar
clang-format committed
922
      if (conditional_skipintra(mode, *best_mode)) continue;
Jingning Han's avatar
Jingning Han committed
923
924
925
926
927
928
929
930
931
932
933
934
935
936
    }

    memcpy(tempa, ta, sizeof(ta));
    memcpy(templ, tl, sizeof(tl));

    for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
      for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
        const int block = (row + idy) * 2 + (col + idx);
        const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
        uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
        int16_t *const src_diff =
            vp10_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
        tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
        xd->mi[0]->bmi[block].as_mode = mode;
clang-format's avatar
clang-format committed
937
938
        vp10_predict_intra_block(xd, 1, 1, TX_4X4, mode, dst, dst_stride, dst,
                                 dst_stride, col + idx, row + idy, 0);
Jingning Han's avatar
Jingning Han committed
939
940
        vpx_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride);

941
        if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
hui su's avatar
hui su committed
942
943
          TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block);
          const scan_order *so = get_scan(TX_4X4, tx_type);
944
          vp10_fwd_txfm_4x4(src_diff, coeff, 8, DCT_DCT, 1);
Jingning Han's avatar
Jingning Han committed
945
946
947
948
949
950
          vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
          ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
                               so->scan, so->neighbors,
                               cpi->sf.use_fast_coef_costing);
          if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
            goto next;
clang-format's avatar
clang-format committed
951
952
          vp10_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block), dst,
                                dst_stride, p->eobs[block], DCT_DCT, 1);
Jingning Han's avatar
Jingning Han committed
953
954
        } else {
          int64_t unused;
hui su's avatar
hui su committed
955
956
          TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block);
          const scan_order *so = get_scan(TX_4X4, tx_type);
957
          vp10_fwd_txfm_4x4(src_diff, coeff, 8, tx_type, 0);
Jingning Han's avatar
Jingning Han committed
958
959
          vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
          ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
clang-format's avatar
clang-format committed
960
961
962
963
964
965
                               so->scan, so->neighbors,
                               cpi->sf.use_fast_coef_costing);
          distortion +=
              vp10_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block), 16,
                               &unused) >>
              2;
Jingning Han's avatar
Jingning Han committed
966
967
          if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
            goto next;
clang-format's avatar
clang-format committed
968
969
          vp10_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block), dst,
                                dst_stride, p->eobs[block], tx_type, 0);
Jingning Han's avatar
Jingning Han committed
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
        }
      }
    }

    rate += ratey;
    this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);

    if (this_rd < best_rd) {
      *bestrate = rate;
      *bestratey = ratey;
      *bestdistortion = distortion;
      best_rd = this_rd;
      *best_mode = mode;
      memcpy(a, tempa, sizeof(tempa));
      memcpy(l, templ, sizeof(templ));
      for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
        memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
               num_4x4_blocks_wide * 4);
    }
clang-format's avatar
clang-format committed
989
  next : {}
Jingning Han's avatar
Jingning Han committed
990
991
  }

clang-format's avatar
clang-format committed
992
  if (best_rd >= rd_thresh) return best_rd;
Jingning Han's avatar
Jingning Han committed
993
994
995
996
997
998
999
1000

  for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
    memcpy(dst_init + idy * dst_stride, best_dst + idy * 8,
           num_4x4_blocks_wide * 4);

  return best_rd;
}

Yaowu Xu's avatar
Yaowu Xu committed
1001
static int64_t rd_pick_intra_sub_8x8_y_mode(VP10_COMP *cpi, MACROBLOCK *mb,
Jingning Han's avatar
Jingning Han committed
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
                                            int *rate, int *rate_y,
                                            int64_t *distortion,
                                            int64_t best_rd) {
  int i, j;
  const MACROBLOCKD *const xd = &mb->e_mbd;
  MODE_INFO *const mic = xd->mi[0];
  const MODE_INFO *above_mi = xd->above_mi;
  const MODE_INFO *left_mi = xd->left_mi;
  const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
  const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
  const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
  int idx, idy;
  int cost = 0;
  int64_t total_distortion = 0;
  int tot_rate_y = 0;
  int64_t total_rd = 0;
  ENTROPY_CONTEXT t_above[4], t_left[4];
  const int *bmode_costs = cpi->mbmode_cost;

  memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
  memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));

  // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
  for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
    for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
      PREDICTION_MODE best_mode = DC_PRED;
      int r = INT_MAX, ry = INT_MAX;
      int64_t d = INT64_MAX, this_rd = INT64_MAX;
      i = idy * 2 + idx;
      if (cpi->common.frame_type == KEY_FRAME) {
        const PREDICTION_MODE A = vp10_above_block_mode(mic, above_mi, i);
        const PREDICTION_MODE L = vp10_left_block_mode(mic, left_mi, i);

clang-format's avatar
clang-format committed
1035
        bmode_costs = cpi->y_mode_costs[A][L];
Jingning Han's avatar
Jingning Han committed
1036
1037
1038
1039
1040
      }

      this_rd = rd_pick_intra4x4block(cpi, mb, idy, idx, &best_mode,
                                      bmode_costs, t_above + idx, t_left + idy,
                                      &r, &ry, &d, bsize, best_rd - total_rd);
clang-format's avatar
clang-format committed
1041
      if (this_rd >= best_rd - total_rd) return INT64_MAX;
Jingning Han's avatar
Jingning Han committed
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053

      total_rd += this_rd;
      cost += r;
      total_distortion += d;
      tot_rate_y += ry;

      mic->bmi[i].as_mode = best_mode;
      for (j = 1; j < num_4x4_blocks_high; ++j)
        mic->bmi[i + j * 2].as_mode = best_mode;
      for (j = 1; j < num_4x4_blocks_wide; ++j)
        mic->bmi[i + j].as_mode = best_mode;

clang-format's avatar
clang-format committed
1054
      if (total_rd >= best_rd) return INT64_MAX;
Jingning Han's avatar
Jingning Han committed
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
    }
  }

  *rate = cost;
  *rate_y = tot_rate_y;
  *distortion = total_distortion;
  mic->mbmi.mode = mic->bmi[3].as_mode;

  return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);
}

// This function is used only for intra_only frames
clang-format's avatar
clang-format committed
1067
1068
1069
static int64_t rd_pick_intra_sby_mode(VP10_COMP *cpi, MACROBLOCK *x, int *rate,
                                      int *rate_tokenonly, int64_t *distortion,
                                      int *skippable, BLOCK_SIZE bsize,
Jingning Han's avatar
Jingning Han committed
1070
1071
1072
1073
1074
1075
1076
1077
                                      int64_t best_rd) {
  PREDICTION_MODE mode;
  PREDICTION_MODE mode_selected = DC_PRED;
  MACROBLOCKD *const xd = &x->e_mbd;
  MODE_INFO *const mic = xd->mi[0];
  int this_rate, this_rate_tokenonly, s;
  int64_t this_distortion, this_rd;
  TX_SIZE best_tx = TX_4X4;
1078
  TX_TYPE best_tx_type = DCT_DCT;
Jingning Han's avatar
Jingning Han committed
1079
1080
1081
1082
1083
1084
1085
1086
  int *bmode_costs;
  const MODE_INFO *above_mi = xd->above_mi;
  const MODE_INFO *left_mi = xd->left_mi;
  const PREDICTION_MODE A = vp10_above_block_mode(mic, above_mi, 0);
  const PREDICTION_MODE L = vp10_left_block_mode(mic, left_mi, 0);
  bmode_costs = cpi->y_mode_costs[A][L];

  memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
hui su's avatar
hui su committed
1087

Jingning Han's avatar
Jingning Han committed
1088
1089
1090
1091
  /* Y Search for intra prediction mode */
  for (mode = DC_PRED; mode <= TM_PRED; mode++) {
    mic->mbmi.mode = mode;

clang-format's avatar
clang-format committed
1092
1093
    super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, NULL,
                    bsize, best_rd);
Jingning Han's avatar
Jingning Han committed
1094

clang-format's avatar
clang-format committed
1095
    if (this_rate_tokenonly == INT_MAX) continue;
Jingning Han's avatar
Jingning Han committed
1096
1097
1098
1099
1100

    this_rate = this_rate_tokenonly + bmode_costs[mode];
    this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);

    if (this_rd < best_rd) {
clang-format's avatar
clang-format committed
1101
1102
1103
1104
1105
      mode_selected = mode;
      best_rd = this_rd;
      best_tx = mic->mbmi.tx_size;
      best_tx_type = mic->mbmi.tx_type;
      *rate = this_rate;
Jingning Han's avatar
Jingning Han committed
1106
      *rate_tokenonly = this_rate_tokenonly;
clang-format's avatar
clang-format committed
1107
1108
      *distortion = this_distortion;
      *skippable = s;
Jingning Han's avatar
Jingning Han committed
1109
1110
1111
1112
1113
    }
  }

  mic->mbmi.mode = mode_selected;
  mic->mbmi.tx_size = best_tx;
1114
  mic->mbmi.tx_type = best_tx_type;
Jingning Han's avatar
Jingning Han committed
1115
1116
1117
1118
1119
1120

  return best_rd;
}

// Return value 0: early termination triggered, no valid rd cost available;
//              1: rd cost values are valid.
clang-format's avatar
clang-format committed
1121
1122
1123
static int super_block_uvrd(const VP10_COMP *cpi, MACROBLOCK *x, int *rate,
                            int64_t *distortion, int *skippable, int64_t *sse,
                            BLOCK_SIZE bsize, int64_t ref_best_rd) {
Jingning Han's avatar
Jingning Han committed
1124
1125
1126
1127
1128
1129
1130
1131
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
  const TX_SIZE uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]);
  int plane;
  int pnrate = 0, pnskip = 1;
  int64_t pndist = 0, pnsse = 0;
  int is_cost_valid = 1;

clang-format's avatar
clang-format committed
1132
  if (ref_best_rd < 0) is_cost_valid = 0;
Jingning Han's avatar
Jingning Han committed
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145

  if (is_inter_block(mbmi) && is_cost_valid) {
    int plane;
    for (plane = 1; plane < MAX_MB_PLANE; ++plane)
      vp10_subtract_plane(x, bsize, plane);
  }

  *rate = 0;
  *distortion = 0;
  *sse = 0;
  *skippable = 1;

  for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
clang-format's avatar
clang-format committed
1146
1147
    txfm_rd_in_plane(x, &pnrate, &pndist, &pnskip, &pnsse, ref_best_rd, plane,
                     bsize, uv_tx_size, cpi->sf.use_fast_coef_costing);
Jingning Han's avatar
Jingning Han committed
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
    if (pnrate == INT_MAX) {
      is_cost_valid = 0;
      break;
    }
    *rate += pnrate;
    *distortion += pndist;
    *sse += pnsse;
    *skippable &= pnskip;
  }

  if (!is_cost_valid) {
    // reset cost value
    *rate = INT_MAX;
    *distortion = INT64_MAX;
    *sse = INT64_MAX;
    *skippable = 0;
  }

  return is_cost_valid;
}

Yaowu Xu's avatar
Yaowu Xu committed
1169
static int64_t rd_pick_intra_sbuv_mode(VP10_COMP *cpi, MACROBLOCK *x,
clang-format's avatar
clang-format committed
1170
1171
1172
1173
                                       PICK_MODE_CONTEXT *ctx, int *rate,
                                       int *rate_tokenonly, int64_t *distortion,
                                       int *skippable, BLOCK_SIZE bsize,
                                       TX_SIZE max_tx_size) {
Jingning Han's avatar
Jingning Han committed
1174
1175
1176
1177
1178
1179
1180
1181
1182
  MACROBLOCKD *xd = &x->e_mbd;
  PREDICTION_MODE mode;
  PREDICTION_MODE mode_selected = DC_PRED;
  int64_t best_rd = INT64_MAX, this_rd;
  int this_rate_tokenonly, this_rate, s;
  int64_t this_distortion, this_sse;

  memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
  for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
clang-format's avatar
clang-format committed
1183
    if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode))) continue;
Jingning Han's avatar