rdopt.c 205 KB
Newer Older
Jingning Han's avatar
Jingning Han committed
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Jingning Han's avatar
Jingning Han committed
3
 *
4
5
6
7
8
9
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Jingning Han's avatar
Jingning Han committed
10
11
12
13
14
 */

#include <assert.h>
#include <math.h>

Adrian Grange's avatar
Adrian Grange committed
15
#include "./aom_dsp_rtcd.h"
Jingning Han's avatar
Jingning Han committed
16
#include "./av1_rtcd.h"
Jingning Han's avatar
Jingning Han committed
17

Adrian Grange's avatar
Adrian Grange committed
18
#include "aom_dsp/aom_dsp_common.h"
19
#include "aom_dsp/blend.h"
Adrian Grange's avatar
Adrian Grange committed
20
#include "aom_mem/aom_mem.h"
Yaowu Xu's avatar
Yaowu Xu committed
21
22
#include "aom_ports/mem.h"
#include "aom_ports/system_state.h"
Jingning Han's avatar
Jingning Han committed
23

Yaowu Xu's avatar
Yaowu Xu committed
24
25
26
27
28
29
30
31
32
33
34
#include "av1/common/common.h"
#include "av1/common/entropy.h"
#include "av1/common/entropymode.h"
#include "av1/common/idct.h"
#include "av1/common/mvref_common.h"
#include "av1/common/pred_common.h"
#include "av1/common/quant_common.h"
#include "av1/common/reconinter.h"
#include "av1/common/reconintra.h"
#include "av1/common/scan.h"
#include "av1/common/seg_common.h"
Jingning Han's avatar
Jingning Han committed
35

Jingning Han's avatar
Jingning Han committed
36
#include "av1/encoder/aq_variance.h"
Yaowu Xu's avatar
Yaowu Xu committed
37
38
39
40
#include "av1/encoder/cost.h"
#include "av1/encoder/encodemb.h"
#include "av1/encoder/encodemv.h"
#include "av1/encoder/encoder.h"
41
#include "av1/encoder/hybrid_fwd_txfm.h"
Yaowu Xu's avatar
Yaowu Xu committed
42
43
44
45
46
#include "av1/encoder/mcomp.h"
#include "av1/encoder/quantize.h"
#include "av1/encoder/ratectrl.h"
#include "av1/encoder/rd.h"
#include "av1/encoder/rdopt.h"
Jingning Han's avatar
Jingning Han committed
47

48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#if CONFIG_EXT_REFS

#define LAST_FRAME_MODE_MASK                                      \
  ((1 << INTRA_FRAME) | (1 << LAST2_FRAME) | (1 << LAST3_FRAME) | \
   (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))  // NOLINT
#define LAST2_FRAME_MODE_MASK                                    \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST3_FRAME) | \
   (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))  // NOLINT
#define LAST3_FRAME_MODE_MASK                                    \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
   (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))  // NOLINT
#define GOLDEN_FRAME_MODE_MASK                                   \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
   (1 << LAST3_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))  // NOLINT
#define BWDREF_FRAME_MODE_MASK                                   \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
   (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME))  // NOLINT
#define ALTREF_FRAME_MODE_MASK                                   \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
   (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME))  // NOLINT

#else

clang-format's avatar
clang-format committed
71
72
73
74
#define LAST_FRAME_MODE_MASK \
  ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
#define GOLDEN_FRAME_MODE_MASK \
  ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
75
#define ALTREF_FRAME_MODE_MASK \
clang-format's avatar
clang-format committed
76
  ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | (1 << INTRA_FRAME))
Jingning Han's avatar
Jingning Han committed
77

78
79
80
81
82
#endif  // CONFIG_EXT_REFS

#if CONFIG_EXT_REFS
#define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | (1 << BWDREF_FRAME) | 0x01)
#else
clang-format's avatar
clang-format committed
83
#define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | 0x01)
84
#endif  // CONFIG_EXT_REFS
Jingning Han's avatar
Jingning Han committed
85

clang-format's avatar
clang-format committed
86
87
#define MIN_EARLY_TERM_INDEX 3
#define NEW_MV_DISCOUNT_FACTOR 8
Jingning Han's avatar
Jingning Han committed
88

89
90
const double ext_tx_th = 0.99;

Jingning Han's avatar
Jingning Han committed
91
92
93
94
95
typedef struct {
  PREDICTION_MODE mode;
  MV_REFERENCE_FRAME ref_frame[2];
} MODE_DEFINITION;

clang-format's avatar
clang-format committed
96
typedef struct { MV_REFERENCE_FRAME ref_frame[2]; } REF_DEFINITION;
Jingning Han's avatar
Jingning Han committed
97
98
99
100
101
102
103
104
105
106
107
108

struct rdcost_block_args {
  MACROBLOCK *x;
  ENTROPY_CONTEXT t_above[16];
  ENTROPY_CONTEXT t_left[16];
  int this_rate;
  int64_t this_dist;
  int64_t this_sse;
  int64_t this_rd;
  int64_t best_rd;
  int exit_early;
  int use_fast_coef_costing;
109
  const SCAN_ORDER *scan_order;
Jingning Han's avatar
Jingning Han committed
110
111
112
113
  uint8_t skippable;
};

#define LAST_NEW_MV_INDEX 6
114
static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
clang-format's avatar
clang-format committed
115
  { NEARESTMV, { LAST_FRAME, NONE } },
116
117
118
119
120
#if CONFIG_EXT_REFS
  { NEARESTMV, { LAST2_FRAME, NONE } },
  { NEARESTMV, { LAST3_FRAME, NONE } },
  { NEARESTMV, { BWDREF_FRAME, NONE } },
#endif  // CONFIG_EXT_REFS
clang-format's avatar
clang-format committed
121
122
  { NEARESTMV, { ALTREF_FRAME, NONE } },
  { NEARESTMV, { GOLDEN_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
123

clang-format's avatar
clang-format committed
124
  { DC_PRED, { INTRA_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
125

clang-format's avatar
clang-format committed
126
  { NEWMV, { LAST_FRAME, NONE } },
127
128
129
130
131
#if CONFIG_EXT_REFS
  { NEWMV, { LAST2_FRAME, NONE } },
  { NEWMV, { LAST3_FRAME, NONE } },
  { NEWMV, { BWDREF_FRAME, NONE } },
#endif  // CONFIG_EXT_REFS
clang-format's avatar
clang-format committed
132
133
  { NEWMV, { ALTREF_FRAME, NONE } },
  { NEWMV, { GOLDEN_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
134

clang-format's avatar
clang-format committed
135
  { NEARMV, { LAST_FRAME, NONE } },
136
137
138
139
140
#if CONFIG_EXT_REFS
  { NEARMV, { LAST2_FRAME, NONE } },
  { NEARMV, { LAST3_FRAME, NONE } },
  { NEARMV, { BWDREF_FRAME, NONE } },
#endif  // CONFIG_EXT_REFS
clang-format's avatar
clang-format committed
141
142
  { NEARMV, { ALTREF_FRAME, NONE } },
  { NEARMV, { GOLDEN_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
143

clang-format's avatar
clang-format committed
144
  { ZEROMV, { LAST_FRAME, NONE } },
145
146
147
148
149
#if CONFIG_EXT_REFS
  { ZEROMV, { LAST2_FRAME, NONE } },
  { ZEROMV, { LAST3_FRAME, NONE } },
  { ZEROMV, { BWDREF_FRAME, NONE } },
#endif  // CONFIG_EXT_REFS
clang-format's avatar
clang-format committed
150
151
  { ZEROMV, { GOLDEN_FRAME, NONE } },
  { ZEROMV, { ALTREF_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
152

153
154
  // TODO(zoeliu): May need to reconsider the order on the modes to check

clang-format's avatar
clang-format committed
155
  { NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
156
157
158
159
#if CONFIG_EXT_REFS
  { NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
#endif  // CONFIG_EXT_REFS
clang-format's avatar
clang-format committed
160
  { NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
161
162
163
164
165
166
#if CONFIG_EXT_REFS
  { NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
#endif  // CONFIG_EXT_REFS
Jingning Han's avatar
Jingning Han committed
167

clang-format's avatar
clang-format committed
168
  { TM_PRED, { INTRA_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
169

clang-format's avatar
clang-format committed
170
171
  { NEARMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEWMV, { LAST_FRAME, ALTREF_FRAME } },
172
173
174
175
176
177
#if CONFIG_EXT_REFS
  { NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
#endif  // CONFIG_EXT_REFS
clang-format's avatar
clang-format committed
178
179
  { NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
Jingning Han's avatar
Jingning Han committed
180

181
182
183
184
185
186
187
188
189
190
191
#if CONFIG_EXT_REFS
  { NEARMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEWMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
#endif  // CONFIG_EXT_REFS

clang-format's avatar
clang-format committed
192
  { ZEROMV, { LAST_FRAME, ALTREF_FRAME } },
193
194
195
196
#if CONFIG_EXT_REFS
  { ZEROMV, { LAST2_FRAME, ALTREF_FRAME } },
  { ZEROMV, { LAST3_FRAME, ALTREF_FRAME } },
#endif  // CONFIG_EXT_REFS
clang-format's avatar
clang-format committed
197
  { ZEROMV, { GOLDEN_FRAME, ALTREF_FRAME } },
198
199
200
201
202
203
#if CONFIG_EXT_REFS
  { ZEROMV, { LAST_FRAME, BWDREF_FRAME } },
  { ZEROMV, { LAST2_FRAME, BWDREF_FRAME } },
  { ZEROMV, { LAST3_FRAME, BWDREF_FRAME } },
  { ZEROMV, { GOLDEN_FRAME, BWDREF_FRAME } },
#endif  // CONFIG_EXT_REFS
Jingning Han's avatar
Jingning Han committed
204

clang-format's avatar
clang-format committed
205
206
207
208
209
210
211
212
  { H_PRED, { INTRA_FRAME, NONE } },
  { V_PRED, { INTRA_FRAME, NONE } },
  { D135_PRED, { INTRA_FRAME, NONE } },
  { D207_PRED, { INTRA_FRAME, NONE } },
  { D153_PRED, { INTRA_FRAME, NONE } },
  { D63_PRED, { INTRA_FRAME, NONE } },
  { D117_PRED, { INTRA_FRAME, NONE } },
  { D45_PRED, { INTRA_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
213
214
};

215
static const REF_DEFINITION av1_ref_order[MAX_REFS] = {
216
217
218
219
220
221
222
223
  { { LAST_FRAME, NONE } },
#if CONFIG_EXT_REFS
  { { LAST2_FRAME, NONE } },          { { LAST3_FRAME, NONE } },
#endif  // CONFIG_EXT_REFS
  { { GOLDEN_FRAME, NONE } },
#if CONFIG_EXT_REFS
  { { BWDREF_FRAME, NONE } },
#endif  // CONFIG_EXT_REFS
clang-format's avatar
clang-format committed
224
  { { ALTREF_FRAME, NONE } },         { { LAST_FRAME, ALTREF_FRAME } },
225
226
227
228
229
230
231
232
233
#if CONFIG_EXT_REFS
  { { LAST2_FRAME, ALTREF_FRAME } },  { { LAST3_FRAME, ALTREF_FRAME } },
#endif  // CONFIG_EXT_REFS
  { { GOLDEN_FRAME, ALTREF_FRAME } },
#if CONFIG_EXT_REFS
  { { LAST_FRAME, BWDREF_FRAME } },   { { LAST2_FRAME, BWDREF_FRAME } },
  { { LAST3_FRAME, BWDREF_FRAME } },  { { GOLDEN_FRAME, BWDREF_FRAME } },
#endif  // CONFIG_EXT_REFS
  { { INTRA_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
234
235
};

clang-format's avatar
clang-format committed
236
237
static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int m, int n,
                           int min_plane, int max_plane) {
Jingning Han's avatar
Jingning Han committed
238
239
240
241
242
243
  int i;

  for (i = min_plane; i < max_plane; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &x->e_mbd.plane[i];

clang-format's avatar
clang-format committed
244
245
    p->coeff = ctx->coeff_pbuf[i][m];
    p->qcoeff = ctx->qcoeff_pbuf[i][m];
Jingning Han's avatar
Jingning Han committed
246
    pd->dqcoeff = ctx->dqcoeff_pbuf[i][m];
clang-format's avatar
clang-format committed
247
    p->eobs = ctx->eobs_pbuf[i][m];
Jingning Han's avatar
Jingning Han committed
248

clang-format's avatar
clang-format committed
249
250
    ctx->coeff_pbuf[i][m] = ctx->coeff_pbuf[i][n];
    ctx->qcoeff_pbuf[i][m] = ctx->qcoeff_pbuf[i][n];
Jingning Han's avatar
Jingning Han committed
251
    ctx->dqcoeff_pbuf[i][m] = ctx->dqcoeff_pbuf[i][n];
clang-format's avatar
clang-format committed
252
    ctx->eobs_pbuf[i][m] = ctx->eobs_pbuf[i][n];
Jingning Han's avatar
Jingning Han committed
253

clang-format's avatar
clang-format committed
254
255
    ctx->coeff_pbuf[i][n] = p->coeff;
    ctx->qcoeff_pbuf[i][n] = p->qcoeff;
Jingning Han's avatar
Jingning Han committed
256
    ctx->dqcoeff_pbuf[i][n] = pd->dqcoeff;
clang-format's avatar
clang-format committed
257
    ctx->eobs_pbuf[i][n] = p->eobs;
Jingning Han's avatar
Jingning Han committed
258
259
260
  }
}

261
262
static void model_rd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
                            MACROBLOCK *x, MACROBLOCKD *xd, int *out_rate_sum,
clang-format's avatar
clang-format committed
263
264
                            int64_t *out_dist_sum, int *skip_txfm_sb,
                            int64_t *skip_sse_sb) {
Jingning Han's avatar
Jingning Han committed
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
  int i;
  int64_t rate_sum = 0;
  int64_t dist_sum = 0;
  const int ref = xd->mi[0]->mbmi.ref_frame[0];
  unsigned int sse;
  unsigned int var = 0;
  unsigned int sum_sse = 0;
  int64_t total_sse = 0;
  int skip_flag = 1;
  const int shift = 6;
  int rate;
  int64_t dist;
  const int dequant_shift =
281
#if CONFIG_AOM_HIGHBITDEPTH
clang-format's avatar
clang-format committed
282
      (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 :
283
#endif  // CONFIG_AOM_HIGHBITDEPTH
clang-format's avatar
clang-format committed
284
                                                    3;
Jingning Han's avatar
Jingning Han committed
285
286
287
288
289
290
291
292
293
294
295
296
297

  x->pred_sse[ref] = 0;

  for (i = 0; i < MAX_MB_PLANE; ++i) {
    struct macroblock_plane *const p = &x->plane[i];
    struct macroblockd_plane *const pd = &xd->plane[i];
    const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
    const TX_SIZE max_tx_size = max_txsize_lookup[bs];
    const BLOCK_SIZE unit_size = txsize_to_bsize[max_tx_size];
    const int64_t dc_thr = p->quant_thred[0] >> shift;
    const int64_t ac_thr = p->quant_thred[1] >> shift;
    // The low thresholds are used to measure if the prediction errors are
    // low enough so that we can skip the mode search.
Adrian Grange's avatar
Adrian Grange committed
298
299
    const int64_t low_dc_thr = AOMMIN(50, dc_thr >> 2);
    const int64_t low_ac_thr = AOMMIN(80, ac_thr >> 2);
Jingning Han's avatar
Jingning Han committed
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
    int bw = 1 << (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
    int bh = 1 << (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
    int idx, idy;
    int lw = b_width_log2_lookup[unit_size] + 2;
    int lh = b_height_log2_lookup[unit_size] + 2;

    sum_sse = 0;

    for (idy = 0; idy < bh; ++idy) {
      for (idx = 0; idx < bw; ++idx) {
        uint8_t *src = p->src.buf + (idy * p->src.stride << lh) + (idx << lw);
        uint8_t *dst = pd->dst.buf + (idy * pd->dst.stride << lh) + (idx << lh);
        int block_idx = (idy << 1) + idx;
        int low_err_skip = 0;

clang-format's avatar
clang-format committed
315
316
        var = cpi->fn_ptr[unit_size].vf(src, p->src.stride, dst, pd->dst.stride,
                                        &sse);
Jingning Han's avatar
Jingning Han committed
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
        x->bsse[(i << 2) + block_idx] = sse;
        sum_sse += sse;

        x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_NONE;
        if (!x->select_tx_size) {
          // Check if all ac coefficients can be quantized to zero.
          if (var < ac_thr || var == 0) {
            x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_AC_ONLY;

            // Check if dc coefficient can be quantized to zero.
            if (sse - var < dc_thr || sse == var) {
              x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_AC_DC;

              if (!sse || (var < low_ac_thr && sse - var < low_dc_thr))
                low_err_skip = 1;
            }
          }
        }

clang-format's avatar
clang-format committed
336
        if (skip_flag && !low_err_skip) skip_flag = 0;
Jingning Han's avatar
Jingning Han committed
337

clang-format's avatar
clang-format committed
338
        if (i == 0) x->pred_sse[ref] += sse;
Jingning Han's avatar
Jingning Han committed
339
340
341
342
343
344
345
346
      }
    }

    total_sse += sum_sse;

    // Fast approximate the modelling function.
    if (cpi->sf.simple_model_rd_from_var) {
      const int64_t square_error = sum_sse;
347
348
349
350
351
352
353
      const int quantizer = (pd->dequant[1] >> dequant_shift);
      const int64_t rate_temp =
          (quantizer < 120)
              ? (square_error * (280 - quantizer)) >> (16 - AV1_PROB_COST_SHIFT)
              : 0;
      assert(rate_temp == (int)rate_temp);
      rate = (int)rate_temp;
Jingning Han's avatar
Jingning Han committed
354
355
      dist = (square_error * quantizer) >> 8;
    } else {
356
      av1_model_rd_from_var_lapndz(sum_sse, num_pels_log2_lookup[bs],
clang-format's avatar
clang-format committed
357
358
                                   pd->dequant[1] >> dequant_shift, &rate,
                                   &dist);
Jingning Han's avatar
Jingning Han committed
359
    }
360
361
    rate_sum += rate;
    dist_sum += dist;
Jingning Han's avatar
Jingning Han committed
362
363
364
365
366
367
368
369
  }

  *skip_txfm_sb = skip_flag;
  *skip_sse_sb = total_sse << 4;
  *out_rate_sum = (int)rate_sum;
  *out_dist_sum = dist_sum << 4;
}

370
int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
clang-format's avatar
clang-format committed
371
                          intptr_t block_size, int64_t *ssz) {
Jingning Han's avatar
Jingning Han committed
372
373
374
375
376
  int i;
  int64_t error = 0, sqcoeff = 0;

  for (i = 0; i < block_size; i++) {
    const int diff = coeff[i] - dqcoeff[i];
clang-format's avatar
clang-format committed
377
    error += diff * diff;
Jingning Han's avatar
Jingning Han committed
378
379
380
381
382
383
384
    sqcoeff += coeff[i] * coeff[i];
  }

  *ssz = sqcoeff;
  return error;
}

385
int64_t av1_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff,
clang-format's avatar
clang-format committed
386
                             int block_size) {
Jingning Han's avatar
Jingning Han committed
387
388
389
390
391
  int i;
  int64_t error = 0;

  for (i = 0; i < block_size; i++) {
    const int diff = coeff[i] - dqcoeff[i];
clang-format's avatar
clang-format committed
392
    error += diff * diff;
Jingning Han's avatar
Jingning Han committed
393
394
395
396
397
  }

  return error;
}

398
399
#if CONFIG_AOM_HIGHBITDEPTH
int64_t av1_highbd_block_error_c(const tran_low_t *coeff,
clang-format's avatar
clang-format committed
400
401
                                 const tran_low_t *dqcoeff, intptr_t block_size,
                                 int64_t *ssz, int bd) {
Jingning Han's avatar
Jingning Han committed
402
403
404
405
406
407
408
  int i;
  int64_t error = 0, sqcoeff = 0;
  int shift = 2 * (bd - 8);
  int rounding = shift > 0 ? 1 << (shift - 1) : 0;

  for (i = 0; i < block_size; i++) {
    const int64_t diff = coeff[i] - dqcoeff[i];
clang-format's avatar
clang-format committed
409
    error += diff * diff;
Jingning Han's avatar
Jingning Han committed
410
411
412
413
414
415
416
417
418
    sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
  }
  assert(error >= 0 && sqcoeff >= 0);
  error = (error + rounding) >> shift;
  sqcoeff = (sqcoeff + rounding) >> shift;

  *ssz = sqcoeff;
  return error;
}
419
#endif  // CONFIG_AOM_HIGHBITDEPTH
Jingning Han's avatar
Jingning Han committed
420
421
422
423
424
425
426

/* The trailing '0' is a terminator which is used inside cost_coeffs() to
 * decide whether to include cost of a trailing EOB node or not (i.e. we
 * can skip this if the last coefficient in this transform block, e.g. the
 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
 * were non-zero). */
static const int16_t band_counts[TX_SIZES][8] = {
clang-format's avatar
clang-format committed
427
428
429
  { 1, 2, 3, 4, 3, 16 - 13, 0 },
  { 1, 2, 3, 4, 11, 64 - 21, 0 },
  { 1, 2, 3, 4, 11, 256 - 21, 0 },
Jingning Han's avatar
Jingning Han committed
430
431
  { 1, 2, 3, 4, 11, 1024 - 21, 0 },
};
clang-format's avatar
clang-format committed
432
433
434
static int cost_coeffs(MACROBLOCK *x, int plane, int block, ENTROPY_CONTEXT *A,
                       ENTROPY_CONTEXT *L, TX_SIZE tx_size, const int16_t *scan,
                       const int16_t *nb, int use_fast_coef_costing) {
Jingning Han's avatar
Jingning Han committed
435
436
437
438
439
440
441
442
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
  const struct macroblock_plane *p = &x->plane[plane];
  const struct macroblockd_plane *pd = &xd->plane[plane];
  const PLANE_TYPE type = pd->plane_type;
  const int16_t *band_count = &band_counts[tx_size][1];
  const int eob = p->eobs[block];
  const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
clang-format's avatar
clang-format committed
443
444
  unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
      x->token_costs[tx_size][type][is_inter_block(mbmi)];
Jingning Han's avatar
Jingning Han committed
445
446
447
  uint8_t token_cache[32 * 32];
  int pt = combine_entropy_contexts(*A, *L);
  int c, cost;
448
449
#if CONFIG_AOM_HIGHBITDEPTH
  const int *cat6_high_cost = av1_get_high_cost_table(xd->bd);
Jingning Han's avatar
Jingning Han committed
450
#else
451
  const int *cat6_high_cost = av1_get_high_cost_table(8);
Jingning Han's avatar
Jingning Han committed
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
#endif

  // Check for consistency of tx_size with mode info
  assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
                              : get_uv_tx_size(mbmi, pd) == tx_size);

  if (eob == 0) {
    // single eob token
    cost = token_costs[0][0][pt][EOB_TOKEN];
    c = 0;
  } else {
    int band_left = *band_count++;

    // dc token
    int v = qcoeff[0];
    int16_t prev_t;
    EXTRABIT e;
469
    av1_get_token_extra(v, &prev_t, &e);
clang-format's avatar
clang-format committed
470
471
    cost =
        (*token_costs)[0][pt][prev_t] + av1_get_cost(prev_t, e, cat6_high_cost);
Jingning Han's avatar
Jingning Han committed
472

473
    token_cache[0] = av1_pt_energy_class[prev_t];
Jingning Han's avatar
Jingning Han committed
474
475
476
477
478
479
480
481
    ++token_costs;

    // ac tokens
    for (c = 1; c < eob; c++) {
      const int rc = scan[c];
      int16_t t;

      v = qcoeff[rc];
482
      av1_get_token_extra(v, &t, &e);
Jingning Han's avatar
Jingning Han committed
483
484
      if (use_fast_coef_costing) {
        cost += (*token_costs)[!prev_t][!prev_t][t] +
485
                av1_get_cost(t, e, cat6_high_cost);
Jingning Han's avatar
Jingning Han committed
486
487
      } else {
        pt = get_coef_context(nb, token_cache, c);
clang-format's avatar
clang-format committed
488
489
        cost +=
            (*token_costs)[!prev_t][pt][t] + av1_get_cost(t, e, cat6_high_cost);
490
        token_cache[rc] = av1_pt_energy_class[t];
Jingning Han's avatar
Jingning Han committed
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
      }
      prev_t = t;
      if (!--band_left) {
        band_left = *band_count++;
        ++token_costs;
      }
    }

    // eob token
    if (band_left) {
      if (use_fast_coef_costing) {
        cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
      } else {
        pt = get_coef_context(nb, token_cache, c);
        cost += (*token_costs)[0][pt][EOB_TOKEN];
      }
    }
  }

  // is eob first coefficient;
  *A = *L = (c > 0);

  return cost;
}

static void dist_block(MACROBLOCK *x, int plane, int block, TX_SIZE tx_size,
                       int64_t *out_dist, int64_t *out_sse) {
  const int ss_txfrm_size = tx_size << 1;
clang-format's avatar
clang-format committed
519
  MACROBLOCKD *const xd = &x->e_mbd;
Jingning Han's avatar
Jingning Han committed
520
521
522
523
524
525
  const struct macroblock_plane *const p = &x->plane[plane];
  const struct macroblockd_plane *const pd = &xd->plane[plane];
  int64_t this_sse;
  int shift = tx_size == TX_32X32 ? 0 : 2;
  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
526
#if CONFIG_AOM_HIGHBITDEPTH
Jingning Han's avatar
Jingning Han committed
527
  const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8;
528
  *out_dist = av1_highbd_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
clang-format's avatar
clang-format committed
529
                                     &this_sse, bd) >>
clang-format's avatar
clang-format committed
530
              shift;
Jingning Han's avatar
Jingning Han committed
531
#else
clang-format's avatar
clang-format committed
532
  *out_dist =
533
534
      av1_block_error(coeff, dqcoeff, 16 << ss_txfrm_size, &this_sse) >> shift;
#endif  // CONFIG_AOM_HIGHBITDEPTH
Jingning Han's avatar
Jingning Han committed
535
536
537
  *out_sse = this_sse >> shift;
}

538
static int rate_block(int plane, int block, int blk_row, int blk_col,
clang-format's avatar
clang-format committed
539
                      TX_SIZE tx_size, struct rdcost_block_args *args) {
540
  return cost_coeffs(args->x, plane, block, args->t_above + blk_col,
541
542
                     args->t_left + blk_row, tx_size, args->scan_order->scan,
                     args->scan_order->neighbors, args->use_fast_coef_costing);
Jingning Han's avatar
Jingning Han committed
543
544
}

545
static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
clang-format's avatar
clang-format committed
546
                          BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
Jingning Han's avatar
Jingning Han committed
547
548
549
550
551
552
553
554
555
  struct rdcost_block_args *args = arg;
  MACROBLOCK *const x = args->x;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
  int64_t rd1, rd2, rd;
  int rate;
  int64_t dist;
  int64_t sse;

clang-format's avatar
clang-format committed
556
  if (args->exit_early) return;
Jingning Han's avatar
Jingning Han committed
557
558

  if (!is_inter_block(mbmi)) {
559
    struct encode_b_args b_args = { x, NULL, &mbmi->skip };
clang-format's avatar
clang-format committed
560
    av1_encode_block_intra(plane, block, blk_row, blk_col, plane_bsize, tx_size,
561
                           &b_args);
Jingning Han's avatar
Jingning Han committed
562
563
564
565
566
    dist_block(x, plane, block, tx_size, &dist, &sse);
  } else if (max_txsize_lookup[plane_bsize] == tx_size) {
    if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] ==
        SKIP_TXFM_NONE) {
      // full forward transform and quantization
567
      av1_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size);
Jingning Han's avatar
Jingning Han committed
568
569
570
571
      dist_block(x, plane, block, tx_size, &dist, &sse);
    } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] ==
               SKIP_TXFM_AC_ONLY) {
      // compute DC coefficient
clang-format's avatar
clang-format committed
572
      tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block);
Jingning Han's avatar
Jingning Han committed
573
      tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
574
      av1_xform_quant_dc(x, plane, block, blk_row, blk_col, plane_bsize,
clang-format's avatar
clang-format committed
575
                         tx_size);
clang-format's avatar
clang-format committed
576
      sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
Jingning Han's avatar
Jingning Han committed
577
578
579
580
581
      dist = sse;
      if (x->plane[plane].eobs[block]) {
        const int64_t orig_sse = (int64_t)coeff[0] * coeff[0];
        const int64_t resd_sse = coeff[0] - dqcoeff[0];
        int64_t dc_correct = orig_sse - resd_sse * resd_sse;
582
#if CONFIG_AOM_HIGHBITDEPTH
Jingning Han's avatar
Jingning Han committed
583
584
        dc_correct >>= ((xd->bd - 8) * 2);
#endif
clang-format's avatar
clang-format committed
585
        if (tx_size != TX_32X32) dc_correct >>= 2;
Jingning Han's avatar
Jingning Han committed
586

Adrian Grange's avatar
Adrian Grange committed
587
        dist = AOMMAX(0, sse - dc_correct);
Jingning Han's avatar
Jingning Han committed
588
589
590
591
592
      }
    } else {
      // SKIP_TXFM_AC_DC
      // skip forward transform
      x->plane[plane].eobs[block] = 0;
clang-format's avatar
clang-format committed
593
      sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
Jingning Han's avatar
Jingning Han committed
594
595
596
597
      dist = sse;
    }
  } else {
    // full forward transform and quantization
598
    av1_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size);
Jingning Han's avatar
Jingning Han committed
599
600
601
602
603
604
605
606
607
    dist_block(x, plane, block, tx_size, &dist, &sse);
  }

  rd = RDCOST(x->rdmult, x->rddiv, 0, dist);
  if (args->this_rd + rd > args->best_rd) {
    args->exit_early = 1;
    return;
  }

608
  rate = rate_block(plane, block, blk_row, blk_col, tx_size, args);
Jingning Han's avatar
Jingning Han committed
609
610
611
612
  rd1 = RDCOST(x->rdmult, x->rddiv, rate, dist);
  rd2 = RDCOST(x->rdmult, x->rddiv, 0, sse);

  // TODO(jingning): temporarily enabled only for luma component
Adrian Grange's avatar
Adrian Grange committed
613
  rd = AOMMIN(rd1, rd2);
Jingning Han's avatar
Jingning Han committed
614
  if (plane == 0)
clang-format's avatar
clang-format committed
615
616
    x->zcoeff_blk[tx_size][block] =
        !x->plane[plane].eobs[block] ||
617
        (rd1 > rd2 && !xd->lossless[mbmi->segment_id]);
Jingning Han's avatar
Jingning Han committed
618
619
620
621
622
623
624
625
626
627
628
629
630
631

  args->this_rate += rate;
  args->this_dist += dist;
  args->this_sse += sse;
  args->this_rd += rd;

  if (args->this_rd > args->best_rd) {
    args->exit_early = 1;
    return;
  }

  args->skippable &= !x->plane[plane].eobs[block];
}

clang-format's avatar
clang-format committed
632
633
634
static void txfm_rd_in_plane(MACROBLOCK *x, int *rate, int64_t *distortion,
                             int *skippable, int64_t *sse, int64_t ref_best_rd,
                             int plane, BLOCK_SIZE bsize, TX_SIZE tx_size,
Jingning Han's avatar
Jingning Han committed
635
636
637
                             int use_fast_coef_casting) {
  MACROBLOCKD *const xd = &x->e_mbd;
  const struct macroblockd_plane *const pd = &xd->plane[plane];
hui su's avatar
hui su committed
638
  TX_TYPE tx_type;
Jingning Han's avatar
Jingning Han committed
639
  struct rdcost_block_args args;
640
  av1_zero(args);
Jingning Han's avatar
Jingning Han committed
641
642
643
644
645
  args.x = x;
  args.best_rd = ref_best_rd;
  args.use_fast_coef_costing = use_fast_coef_casting;
  args.skippable = 1;

clang-format's avatar
clang-format committed
646
  if (plane == 0) xd->mi[0]->mbmi.tx_size = tx_size;
Jingning Han's avatar
Jingning Han committed
647

648
  av1_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
Jingning Han's avatar
Jingning Han committed
649

hui su's avatar
hui su committed
650
  tx_type = get_tx_type(pd->plane_type, xd, 0);
651
  args.scan_order = get_scan(tx_size, tx_type);
Jingning Han's avatar
Jingning Han committed
652

653
  av1_foreach_transformed_block_in_plane(xd, bsize, plane, block_rd_txfm,
clang-format's avatar
clang-format committed
654
                                         &args);
Jingning Han's avatar
Jingning Han committed
655
  if (args.exit_early) {
clang-format's avatar
clang-format committed
656
    *rate = INT_MAX;
Jingning Han's avatar
Jingning Han committed
657
    *distortion = INT64_MAX;
clang-format's avatar
clang-format committed
658
659
    *sse = INT64_MAX;
    *skippable = 0;
Jingning Han's avatar
Jingning Han committed
660
661
  } else {
    *distortion = args.this_dist;
clang-format's avatar
clang-format committed
662
663
664
    *rate = args.this_rate;
    *sse = args.this_sse;
    *skippable = args.skippable;
Jingning Han's avatar
Jingning Han committed
665
666
667
  }
}

668
669
670
671
static void choose_largest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x,
                                   int *rate, int64_t *distortion, int *skip,
                                   int64_t *sse, int64_t ref_best_rd,
                                   BLOCK_SIZE bs) {
Jingning Han's avatar
Jingning Han committed
672
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
673
  const AV1_COMMON *const cm = &cpi->common;
Jingning Han's avatar
Jingning Han committed
674
675
676
677
  const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;

678
679
680
  TX_TYPE tx_type, best_tx_type = DCT_DCT;
  int r, s;
  int64_t d, psse, this_rd, best_rd = INT64_MAX;
681
682
683
  aom_prob skip_prob = av1_get_skip_prob(cm, xd);
  int s0 = av1_cost_bit(skip_prob, 0);
  int s1 = av1_cost_bit(skip_prob, 1);
684
685
  const int is_inter = is_inter_block(mbmi);

Adrian Grange's avatar
Adrian Grange committed
686
  mbmi->tx_size = AOMMIN(max_tx_size, largest_tx_size);
clang-format's avatar
clang-format committed
687
  if (mbmi->tx_size < TX_32X32 && !xd->lossless[mbmi->segment_id]) {
688
689
    for (tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
      mbmi->tx_type = tx_type;
clang-format's avatar
clang-format committed
690
      txfm_rd_in_plane(x, &r, &d, &s, &psse, ref_best_rd, 0, bs, mbmi->tx_size,
691
                       cpi->sf.use_fast_coef_costing);
clang-format's avatar
clang-format committed
692
      if (r == INT_MAX) continue;
693
694
695
696
697
698
699
700
701
702
703
      if (is_inter)
        r += cpi->inter_tx_type_costs[mbmi->tx_size][mbmi->tx_type];
      else
        r += cpi->intra_tx_type_costs[mbmi->tx_size]
                                     [intra_mode_to_tx_type_context[mbmi->mode]]
                                     [mbmi->tx_type];
      if (s)
        this_rd = RDCOST(x->rdmult, x->rddiv, s1, psse);
      else
        this_rd = RDCOST(x->rdmult, x->rddiv, r + s0, d);
      if (is_inter && !xd->lossless[mbmi->segment_id] && !s)
Adrian Grange's avatar
Adrian Grange committed
704
        this_rd = AOMMIN(this_rd, RDCOST(x->rdmult, x->rddiv, s1, psse));
Jingning Han's avatar
Jingning Han committed
705

706
707
708
709
710
711
712
      if (this_rd < ((best_tx_type == DCT_DCT) ? ext_tx_th : 1) * best_rd) {
        best_rd = this_rd;
        best_tx_type = mbmi->tx_type;
      }
    }
  }
  mbmi->tx_type = best_tx_type;
clang-format's avatar
clang-format committed
713
  txfm_rd_in_plane(x, rate, distortion, skip, sse, ref_best_rd, 0, bs,
Jingning Han's avatar
Jingning Han committed
714
                   mbmi->tx_size, cpi->sf.use_fast_coef_costing);
715
716
  if (mbmi->tx_size < TX_32X32 && !xd->lossless[mbmi->segment_id] &&
      *rate != INT_MAX) {
717
718
719
    if (is_inter)
      *rate += cpi->inter_tx_type_costs[mbmi->tx_size][mbmi->tx_type];
    else
clang-format's avatar
clang-format committed
720
721
722
      *rate += cpi->intra_tx_type_costs
                   [mbmi->tx_size][intra_mode_to_tx_type_context[mbmi->mode]]
                   [mbmi->tx_type];
723
  }
Jingning Han's avatar
Jingning Han committed
724
725
}

726
727
static void choose_smallest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x,
                                    int *rate, int64_t *distortion, int *skip,
clang-format's avatar
clang-format committed
728
                                    int64_t *sse, int64_t ref_best_rd,
729
730
731
732
733
                                    BLOCK_SIZE bs) {
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;

  mbmi->tx_size = TX_4X4;
734
  mbmi->tx_type = DCT_DCT;
clang-format's avatar
clang-format committed
735
  txfm_rd_in_plane(x, rate, distortion, skip, sse, ref_best_rd, 0, bs,
736
737
738
                   mbmi->tx_size, cpi->sf.use_fast_coef_costing);
}

739
740
static void choose_tx_size_from_rd(const AV1_COMP *const cpi, MACROBLOCK *x,
                                   int *rate, int64_t *distortion, int *skip,
clang-format's avatar
clang-format committed
741
                                   int64_t *psse, int64_t ref_best_rd,
Jingning Han's avatar
Jingning Han committed
742
743
                                   BLOCK_SIZE bs) {
  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
744
  const AV1_COMMON *const cm = &cpi->common;
Jingning Han's avatar
Jingning Han committed
745
746
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
747
  aom_prob skip_prob = av1_get_skip_prob(cm, xd);
748
749
750
  int r, s;
  int64_t d, sse;
  int64_t rd = INT64_MAX;
Jingning Han's avatar
Jingning Han committed
751
752
  int n, m;
  int s0, s1;
753
  int64_t best_rd = INT64_MAX, last_rd = INT64_MAX;
754
  TX_SIZE best_tx = TX_SIZES;
Jingning Han's avatar
Jingning Han committed
755
  int start_tx, end_tx;
756
757
758
  const int tx_select = cm->tx_mode == TX_MODE_SELECT;
  TX_TYPE tx_type, best_tx_type = DCT_DCT;
  const int is_inter = is_inter_block(mbmi);
759
760
  uint8_t zcoeff_blk[TX_SIZES][256];
  int num_4x4_blks = 1 << (num_pels_log2_lookup[bs] - 4);
Adrian Grange's avatar
Adrian Grange committed
761
  const aom_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs);
762

Jingning Han's avatar
Jingning Han committed
763
  assert(skip_prob > 0);
764
765
  s0 = av1_cost_bit(skip_prob, 0);
  s1 = av1_cost_bit(skip_prob, 1);
Jingning Han's avatar
Jingning Han committed
766

767
  if (tx_select) {
Jingning Han's avatar
Jingning Han committed
768
    start_tx = max_tx_size;
769
    end_tx = (max_tx_size == TX_32X32) ? TX_8X8 : TX_4X4;
Jingning Han's avatar
Jingning Han committed
770
  } else {
771
    const TX_SIZE chosen_tx_size =
Adrian Grange's avatar
Adrian Grange committed
772
        AOMMIN(max_tx_size, tx_mode_to_biggest_tx_size[cm->tx_mode]);
Jingning Han's avatar
Jingning Han committed
773
774
775
776
    start_tx = chosen_tx_size;
    end_tx = chosen_tx_size;
  }

777
  *distortion = INT64_MAX;
clang-format's avatar
clang-format committed
778
779
780
  *rate = INT_MAX;
  *skip = 0;
  *psse = INT64_MAX;
781
782

  for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
783
#if CONFIG_REF_MV
clang-format's avatar
clang-format committed
784
    if (mbmi->ref_mv_idx > 0 && tx_type != DCT_DCT) continue;
785
786
#endif

787
788
789
    last_rd = INT64_MAX;
    for (n = start_tx; n >= end_tx; --n) {
      int r_tx_size = 0;
clang-format's avatar
clang-format committed
790
      for (m = 0; m <= n - (n == (int)max_tx_size); ++m) {
791
        if (m == n)
792
          r_tx_size += av1_cost_zero(tx_probs[m]);
793
        else
794
          r_tx_size += av1_cost_one(tx_probs[m]);
795
796
797
798
799
800
      }

      if (n >= TX_32X32 && tx_type != DCT_DCT) {
        continue;
      }
      mbmi->tx_type = tx_type;
clang-format's avatar
clang-format committed
801
      txfm_rd_in_plane(x, &r, &d, &s, &sse, ref_best_rd, 0, bs, n,
802
                       cpi->sf.use_fast_coef_costing);
clang-format's avatar
clang-format committed
803
      if (n < TX_32X32 && !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
804
805
806
807
          r != INT_MAX) {
        if (is_inter)
          r += cpi->inter_tx_type_costs[mbmi->tx_size][mbmi->tx_type];
        else
clang-format's avatar
clang-format committed
808
809
810
          r += cpi->intra_tx_type_costs
                   [mbmi->tx_size][intra_mode_to_tx_type_context[mbmi->mode]]
                   [mbmi->tx_type];
811
812
      }

clang-format's avatar
clang-format committed
813
      if (r == INT_MAX) continue;
814
815
816
817
818

      if (s) {
        if (is_inter) {
          rd = RDCOST(x->rdmult, x->rddiv, s1, sse);
        } else {
clang-format's avatar
clang-format committed
819
          rd = RDCOST(x->rdmult, x->rddiv, s1 + r_tx_size * tx_select, sse);
820
        }
821
      } else {
822
        rd = RDCOST(x->rdmult, x->rddiv, r + s0 + r_tx_size * tx_select, d);
823
      }
Jingning Han's avatar
Jingning Han committed
824

clang-format's avatar
clang-format committed
825
      if (tx_select && !(s && is_inter)) r += r_tx_size;
826

827
      if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] && !s)
Adrian Grange's avatar
Adrian Grange committed
828
        rd = AOMMIN(rd, RDCOST(x->rdmult, x->rddiv, s1, sse));
Jingning Han's avatar
Jingning Han committed
829

830
831
      // Early termination in transform size search.
      if (cpi->sf.tx_size_search_breakout &&
clang-format's avatar
clang-format committed
832
833
          (rd == INT64_MAX || (s == 1 && tx_type != DCT_DCT && n < start_tx) ||
           (n < (int)max_tx_size && rd > last_rd)))
834
835
836
837
        break;

      last_rd = rd;
      if (rd <
clang-format's avatar
clang-format committed
838
          (is_inter && best_tx_type == DCT_DCT ? ext_tx_th : 1) * best_rd) {
839
840
841
        best_tx = n;
        best_rd = rd;
        *distortion = d;
clang-format's avatar
clang-format committed
842
843
844
        *rate = r;
        *skip = s;
        *psse = sse;
845
        best_tx_type = mbmi->tx_type;
846
        memcpy(zcoeff_blk, x->zcoeff_blk[n], num_4x4_blks);
847
      }
Jingning Han's avatar
Jingning Han committed
848
849
850
    }
  }

851
852
  mbmi->tx_size = best_tx;
  mbmi->tx_type = best_tx_type;
853

clang-format's avatar
clang-format committed
854
  if (mbmi->tx_size >= TX_32X32) assert(mbmi->tx_type == DCT_DCT);
855
856
857

  if (best_tx < TX_SIZES)
    memcpy(x->zcoeff_blk[best_tx], zcoeff_blk, num_4x4_blks);
Jingning Han's avatar
Jingning Han committed
858
859
}

860
static void super_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x, int *rate,
clang-format's avatar
clang-format committed
861
862
                            int64_t *distortion, int *skip, int64_t *psse,
                            BLOCK_SIZE bs, int64_t ref_best_rd) {
Jingning Han's avatar
Jingning Han committed
863
864
865
866
867
868
  MACROBLOCKD *xd = &x->e_mbd;
  int64_t sse;
  int64_t *ret_sse = psse ? psse : &sse;

  assert(bs == xd->mi[0]->mbmi.sb_type);

869
  if (CONFIG_MISC_FIXES && xd->lossless[0]) {
870
871
872
873
    choose_smallest_tx_size(cpi, x, rate, distortion, skip, ret_sse,
                            ref_best_rd, bs);
  } else if (cpi->sf.tx_size_search_method == USE_LARGESTALL ||
             xd->lossless[xd->mi[0]->mbmi.segment_id]) {
Jingning Han's avatar
Jingning Han committed
874
875
876
    choose_largest_tx_size(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd,
                           bs);
  } else {
clang-format's avatar
clang-format committed
877
878
    choose_tx_size_from_rd(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd,
                           bs);
Jingning Han's avatar
Jingning Han committed
879
880
881
882
883
  }
}

static int conditional_skipintra(PREDICTION_MODE mode,
                                 PREDICTION_MODE best_intra_mode) {
clang-format's avatar
clang-format committed
884
  if (mode == D117_PRED && best_intra_mode != V_PRED &&
Jingning Han's avatar
Jingning Han committed
885
886
      best_intra_mode != D135_PRED)
    return 1;
clang-format's avatar
clang-format committed
887
  if (mode == D63_PRED && best_intra_mode != V_PRED &&
Jingning Han's avatar
Jingning Han committed
888
889
      best_intra_mode != D45_PRED)
    return 1;
clang-format's avatar
clang-format committed
890
  if (mode == D207_PRED && best_intra_mode != H_PRED &&
Jingning Han's avatar
Jingning Han committed
891
892
      best_intra_mode != D45_PRED)
    return 1;
clang-format's avatar
clang-format committed
893
  if (mode == D153_PRED && best_intra_mode != H_PRED &&
Jingning Han's avatar
Jingning Han committed
894
895
896
897
898
      best_intra_mode != D135_PRED)
    return 1;
  return 0;
}

899
900
901
static int64_t rd_pick_intra4x4block(const AV1_COMP *const cpi, MACROBLOCK *x,
                                     int row, int col,
                                     PREDICTION_MODE *best_mode,
clang-format's avatar
clang-format committed
902
903
904
                                     const int *bmode_costs, ENTROPY_CONTEXT *a,
                                     ENTROPY_CONTEXT *l, int *bestrate,
                                     int *bestratey, int64_t *bestdistortion,
Jingning Han's avatar
Jingning Han committed
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
                                     BLOCK_SIZE bsize, int64_t rd_thresh) {
  PREDICTION_MODE mode;
  MACROBLOCKD *const xd = &x->e_mbd;
  int64_t best_rd = rd_thresh;
  struct macroblock_plane *p = &x->plane[0];
  struct macroblockd_plane *pd = &xd->plane[0];
  const int src_stride = p->src.stride;
  const int dst_stride = pd->dst.stride;
  const uint8_t *src_init = &p->src.buf[row * 4 * src_stride + col * 4];
  uint8_t *dst_init = &pd->dst.buf[row * 4 * src_stride + col * 4];
  ENTROPY_CONTEXT ta[2], tempa[2];
  ENTROPY_CONTEXT tl[2], templ[2];
  const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
  const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
  int idx, idy;
  uint8_t best_dst[8 * 8];
921
#if CONFIG_AOM_HIGHBITDEPTH
Jingning Han's avatar
Jingning Han committed
922
923
924
  uint16_t best_dst16[8 * 8];
#endif

925
926
  memcpy(ta, a, num_4x4_blocks_wide * sizeof(a[0]));
  memcpy(tl, l, num_4x4_blocks_high * sizeof(l[0]));
Jingning Han's avatar
Jingning Han committed
927
928
  xd->mi[0]->mbmi.tx_size = TX_4X4;

929
#if CONFIG_AOM_HIGHBITDEPTH
Jingning Han's avatar
Jingning Han committed
930
931
932
933
934
935
936
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
      int64_t this_rd;
      int ratey = 0;
      int64_t distortion = 0;
      int rate = bmode_costs[mode];

clang-format's avatar
clang-format committed
937
      if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode))) continue;
Jingning Han's avatar
Jingning Han committed
938
939
940
941

      // Only do the oblique modes if the best so far is
      // one of the neighboring directional modes
      if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
clang-format's avatar
clang-format committed
942
        if (conditional_skipintra(mode, *best_mode)) continue;
Jingning Han's avatar
Jingning Han committed
943
944
      }

945
946
      memcpy(tempa, ta, num_4x4_blocks_wide * sizeof(ta[0]));
      memcpy(templ, tl, num_4x4_blocks_high * sizeof(tl[0]));
Jingning Han's avatar
Jingning Han committed
947
948
949
950
951
952

      for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
        for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
          const int block = (row + idy) * 2 + (col + idx);
          const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
          uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
clang-format's avatar
clang-format committed
953
          int16_t *const src_diff =
954
              av1_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
Jingning Han's avatar
Jingning Han committed
955
956
          tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
          xd->mi[0]->bmi[block].as_mode = mode;
957
          av1_predict_intra_block(xd, 1, 1, TX_4X4, mode, dst, dst_stride, dst,
clang-format's avatar
clang-format committed
958
                                  dst_stride, col + idx, row + idy, 0);
Adrian Grange's avatar
Adrian Grange committed
959
          aom_highbd_subtract_block(4, 4, src_diff, 8, src, src_stride, dst,
clang-format's avatar
clang-format committed
960
                                    dst_stride, xd->bd);
961
          if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
hui su's avatar
hui su committed
962
            TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block);
963
            const SCAN_ORDER *scan_order = get_scan(TX_4X4, tx_type);
964
            av1_highbd_fwd_txfm_4x4(src_diff, coeff, 8, DCT_DCT, 1);
965
966
            av1_regular_quantize_b_4x4(x, 0, block, scan_order->scan,
                                       scan_order->iscan);
Jingning Han's avatar
Jingning Han committed
967
            ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
968
                                 scan_order->scan, scan_order->neighbors,
Jingning Han's avatar
Jingning Han committed
969
970
971
                                 cpi->sf.use_fast_coef_costing);
            if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
              goto next_highbd;
972
            av1_highbd_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block), dst,
clang-format's avatar
clang-format committed
973
974
                                        dst_stride, p->eobs[block], xd->bd,
                                        DCT_DCT, 1);
Jingning Han's avatar
Jingning Han committed
975
976
          } else {
            int64_t unused;
hui su's avatar
hui su committed
977
            TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block);
978
            const SCAN_ORDER *scan_order = get_scan(TX_4X4, tx_type);
979
            av1_highbd_fwd_txfm_4x4(src_diff, coeff, 8, tx_type, 0);
980
981
            av1_regular_quantize_b_4x4(x, 0, block, scan_order->scan,
                                       scan_order->iscan);
Jingning Han's avatar
Jingning Han committed
982
            ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
983
                                 scan_order->scan, scan_order->neighbors,
Jingning Han's avatar
Jingning Han committed
984
                                 cpi->sf.use_fast_coef_costing);
clang-format's avatar
clang-format committed
985
            distortion +=
986
                av1_highbd_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block),
clang-format's avatar
clang-format committed
987
                                       16, &unused, xd->bd) >>
clang-format's avatar
clang-format committed
988
                2;
Jingning Han's avatar
Jingning Han committed
989
990
            if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
              goto next_highbd;
991
            av1_highbd_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block), dst,
clang-format's avatar
clang-format committed
992
993
                                        dst_stride, p->eobs[block], xd->bd,
                                        tx_type, 0);
Jingning Han's avatar
Jingning Han committed
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
          }
        }
      }

      rate += ratey;
      this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);

      if (this_rd < best_rd) {
        *bestrate = rate;
        *bestratey = ratey;
        *bestdistortion = distortion;
        best_rd = this_rd;
        *best_mode = mode;
1007
1008
        memcpy(a, tempa, num_4x4_blocks_wide * sizeof(tempa[0]));
        memcpy(l, templ, num_4x4_blocks_high * sizeof(templ[0]));
Jingning Han's avatar
Jingning Han committed
1009
1010
1011
1012
1013
1014
        for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
          memcpy(best_dst16 + idy * 8,
                 CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
                 num_4x4_blocks_wide * 4 * sizeof(uint16_t));
        }
      }
clang-format's avatar
clang-format committed
1015
    next_highbd : {}
Jingning Han's avatar
Jingning Han committed
1016
    }
clang-format's avatar
clang-format committed
1017
    if (best_rd >= rd_thresh) return best_rd;
Jingning Han's avatar
Jingning Han committed
1018
1019
1020

    for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
      memcpy(CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
clang-format's avatar
clang-format committed
1021
             best_dst16 + idy * 8, num_4x4_blocks_wide * 4 * sizeof(uint16_t));
Jingning Han's avatar
Jingning Han committed
1022
1023
1024
1025
    }

    return best_rd;
  }
1026
#endif  // CONFIG_AOM_HIGHBITDEPTH
Jingning Han's avatar
Jingning Han committed
1027
1028
1029
1030
1031
1032
1033

  for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
    int64_t this_rd;
    int ratey = 0;
    int64_t distortion = 0;
    int rate = bmode_costs[mode];

clang-format's avatar
clang-format committed
1034
    if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode))) continue;
Jingning Han's avatar
Jingning Han committed
1035
1036
1037
1038

    // Only do the oblique modes if the best so far is
    // one of the neighboring directional modes
    if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {