rdopt.c 399 KB
Newer Older
Jingning Han's avatar
Jingning Han committed
1
/*
Yaowu Xu's avatar
Yaowu Xu committed
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Jingning Han's avatar
Jingning Han committed
3
 *
Yaowu Xu's avatar
Yaowu Xu committed
4 5 6 7 8 9
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Jingning Han's avatar
Jingning Han committed
10 11 12 13 14
 */

#include <assert.h>
#include <math.h>

Yaowu Xu's avatar
Yaowu Xu committed
15 16
#include "./av1_rtcd.h"
#include "./aom_dsp_rtcd.h"
Jingning Han's avatar
Jingning Han committed
17

Yaowu Xu's avatar
Yaowu Xu committed
18
#include "aom_dsp/aom_dsp_common.h"
19
#include "aom_dsp/blend.h"
Yaowu Xu's avatar
Yaowu Xu committed
20
#include "aom_mem/aom_mem.h"
21 22
#include "aom_ports/mem.h"
#include "aom_ports/system_state.h"
Jingning Han's avatar
Jingning Han committed
23

24 25 26 27 28 29 30 31 32 33 34 35
#include "av1/common/common.h"
#include "av1/common/common_data.h"
#include "av1/common/entropy.h"
#include "av1/common/entropymode.h"
#include "av1/common/idct.h"
#include "av1/common/mvref_common.h"
#include "av1/common/pred_common.h"
#include "av1/common/quant_common.h"
#include "av1/common/reconinter.h"
#include "av1/common/reconintra.h"
#include "av1/common/scan.h"
#include "av1/common/seg_common.h"
Jingning Han's avatar
Jingning Han committed
36

37 38 39 40 41 42 43 44 45 46 47 48
#include "av1/encoder/cost.h"
#include "av1/encoder/encodemb.h"
#include "av1/encoder/encodemv.h"
#include "av1/encoder/encoder.h"
#include "av1/encoder/hybrid_fwd_txfm.h"
#include "av1/encoder/mcomp.h"
#include "av1/encoder/palette.h"
#include "av1/encoder/quantize.h"
#include "av1/encoder/ratectrl.h"
#include "av1/encoder/rd.h"
#include "av1/encoder/rdopt.h"
#include "av1/encoder/aq_variance.h"
Jingning Han's avatar
Jingning Han committed
49

50 51 52
#if CONFIG_DUAL_FILTER
#if CONFIG_EXT_INTERP
static const int filter_sets[25][2] = {
53 54 55 56
  { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 }, { 0, 4 }, { 1, 0 }, { 1, 1 },
  { 1, 2 }, { 1, 3 }, { 1, 4 }, { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 },
  { 2, 4 }, { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 }, { 3, 4 }, { 4, 0 },
  { 4, 1 }, { 4, 2 }, { 4, 3 }, { 4, 4 },
57 58 59
};
#else
static const int filter_sets[9][2] = {
60 61
  { 0, 0 }, { 0, 1 }, { 0, 2 }, { 1, 0 }, { 1, 1 },
  { 1, 2 }, { 2, 0 }, { 2, 1 }, { 2, 2 },
62 63 64 65
};
#endif
#endif

66 67
#if CONFIG_EXT_REFS

68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94
#define LAST_FRAME_MODE_MASK                                      \
  ((1 << INTRA_FRAME) | (1 << LAST2_FRAME) | (1 << LAST3_FRAME) | \
   (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
#define LAST2_FRAME_MODE_MASK                                    \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST3_FRAME) | \
   (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
#define LAST3_FRAME_MODE_MASK                                    \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
   (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
#define GOLDEN_FRAME_MODE_MASK                                   \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
   (1 << LAST3_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
#define BWDREF_FRAME_MODE_MASK                                   \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
   (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME))
#define ALTREF_FRAME_MODE_MASK                                   \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
   (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME))

#else

#define LAST_FRAME_MODE_MASK \
  ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
#define GOLDEN_FRAME_MODE_MASK \
  ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
#define ALTREF_FRAME_MODE_MASK \
  ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | (1 << INTRA_FRAME))
Jingning Han's avatar
Jingning Han committed
95

96 97
#endif  // CONFIG_EXT_REFS

98
#if CONFIG_EXT_REFS
99
#define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | (1 << BWDREF_FRAME) | 0x01)
100
#else
101
#define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | 0x01)
102
#endif  // CONFIG_EXT_REFS
Jingning Han's avatar
Jingning Han committed
103

104 105
#define MIN_EARLY_TERM_INDEX 3
#define NEW_MV_DISCOUNT_FACTOR 8
Jingning Han's avatar
Jingning Han committed
106

107 108 109 110 111 112
#if CONFIG_EXT_INTRA
#define ANGLE_FAST_SEARCH 1
#define ANGLE_SKIP_THRESH 10
#define FILTER_FAST_SEARCH 1
#endif  // CONFIG_EXT_INTRA

113 114
const double ADST_FLIP_SVM[8] = { -6.6623, -2.8062, -3.2531, 3.1671,    // vert
                                  -7.7051, -3.2234, -3.6193, 3.4533 };  // horz
115

Jingning Han's avatar
Jingning Han committed
116 117 118 119 120
typedef struct {
  PREDICTION_MODE mode;
  MV_REFERENCE_FRAME ref_frame[2];
} MODE_DEFINITION;

121
typedef struct { MV_REFERENCE_FRAME ref_frame[2]; } REF_DEFINITION;
Jingning Han's avatar
Jingning Han committed
122 123

struct rdcost_block_args {
Yaowu Xu's avatar
Yaowu Xu committed
124
  const AV1_COMP *cpi;
Jingning Han's avatar
Jingning Han committed
125
  MACROBLOCK *x;
126 127
  ENTROPY_CONTEXT t_above[2 * MAX_MIB_SIZE];
  ENTROPY_CONTEXT t_left[2 * MAX_MIB_SIZE];
Jingning Han's avatar
Jingning Han committed
128 129 130 131 132 133 134 135 136 137 138 139
  int this_rate;
  int64_t this_dist;
  int64_t this_sse;
  int64_t this_rd;
  int64_t best_rd;
  int exit_early;
  int use_fast_coef_costing;
  const scan_order *so;
  uint8_t skippable;
};

#define LAST_NEW_MV_INDEX 6
Yaowu Xu's avatar
Yaowu Xu committed
140
static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
141
  { NEARESTMV, { LAST_FRAME, NONE } },
142
#if CONFIG_EXT_REFS
143 144 145
  { NEARESTMV, { LAST2_FRAME, NONE } },
  { NEARESTMV, { LAST3_FRAME, NONE } },
  { NEARESTMV, { BWDREF_FRAME, NONE } },
146
#endif  // CONFIG_EXT_REFS
147 148
  { NEARESTMV, { ALTREF_FRAME, NONE } },
  { NEARESTMV, { GOLDEN_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
149

150
  { DC_PRED, { INTRA_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
151

152
  { NEWMV, { LAST_FRAME, NONE } },
153
#if CONFIG_EXT_REFS
154 155 156
  { NEWMV, { LAST2_FRAME, NONE } },
  { NEWMV, { LAST3_FRAME, NONE } },
  { NEWMV, { BWDREF_FRAME, NONE } },
157
#endif  // CONFIG_EXT_REFS
158 159
  { NEWMV, { ALTREF_FRAME, NONE } },
  { NEWMV, { GOLDEN_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
160

161
  { NEARMV, { LAST_FRAME, NONE } },
162
#if CONFIG_EXT_REFS
163 164 165
  { NEARMV, { LAST2_FRAME, NONE } },
  { NEARMV, { LAST3_FRAME, NONE } },
  { NEARMV, { BWDREF_FRAME, NONE } },
166
#endif  // CONFIG_EXT_REFS
167 168
  { NEARMV, { ALTREF_FRAME, NONE } },
  { NEARMV, { GOLDEN_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
169

Yue Chen's avatar
Yue Chen committed
170
#if CONFIG_EXT_INTER
171
  { NEWFROMNEARMV, { LAST_FRAME, NONE } },
172
#if CONFIG_EXT_REFS
173 174 175
  { NEWFROMNEARMV, { LAST2_FRAME, NONE } },
  { NEWFROMNEARMV, { LAST3_FRAME, NONE } },
  { NEWFROMNEARMV, { BWDREF_FRAME, NONE } },
176
#endif  // CONFIG_EXT_REFS
177 178
  { NEWFROMNEARMV, { ALTREF_FRAME, NONE } },
  { NEWFROMNEARMV, { GOLDEN_FRAME, NONE } },
Yue Chen's avatar
Yue Chen committed
179 180
#endif  // CONFIG_EXT_INTER

181
  { ZEROMV, { LAST_FRAME, NONE } },
182
#if CONFIG_EXT_REFS
183 184 185
  { ZEROMV, { LAST2_FRAME, NONE } },
  { ZEROMV, { LAST3_FRAME, NONE } },
  { ZEROMV, { BWDREF_FRAME, NONE } },
186
#endif  // CONFIG_EXT_REFS
187 188
  { ZEROMV, { GOLDEN_FRAME, NONE } },
  { ZEROMV, { ALTREF_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
189

190
// TODO(zoeliu): May need to reconsider the order on the modes to check
191

192
#if CONFIG_EXT_INTER
193
  { NEAREST_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
194
#if CONFIG_EXT_REFS
195 196
  { NEAREST_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEAREST_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
197
#endif  // CONFIG_EXT_REFS
198
  { NEAREST_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
199
#if CONFIG_EXT_REFS
200 201 202 203
  { NEAREST_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
204
#endif  // CONFIG_EXT_REFS
205

206
#else  // CONFIG_EXT_INTER
207

208
  { NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
209
#if CONFIG_EXT_REFS
210 211
  { NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
212
#endif  // CONFIG_EXT_REFS
213
  { NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
214
#if CONFIG_EXT_REFS
215 216 217 218
  { NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
219
#endif  // CONFIG_EXT_REFS
220
#endif  // CONFIG_EXT_INTER
Jingning Han's avatar
Jingning Han committed
221

222
  { TM_PRED, { INTRA_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
223

224
#if CONFIG_EXT_INTER
225 226 227 228 229 230 231 232 233
  { NEAR_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEAREST_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEAR_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEW_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEAREST_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEW_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEAR_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEW_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
  { ZERO_ZEROMV, { LAST_FRAME, ALTREF_FRAME } },
234

235
#if CONFIG_EXT_REFS
236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254
  { NEAR_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEAREST_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEAR_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEW_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEAREST_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEW_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEAR_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEW_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
  { ZERO_ZEROMV, { LAST2_FRAME, ALTREF_FRAME } },

  { NEAR_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEAREST_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEAR_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEW_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEAREST_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEW_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEAR_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEW_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
  { ZERO_ZEROMV, { LAST3_FRAME, ALTREF_FRAME } },
255
#endif  // CONFIG_EXT_REFS
256

257 258 259 260 261 262 263 264 265
  { NEAR_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEAREST_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEAR_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEW_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEAREST_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEW_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEAR_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEW_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { ZERO_ZEROMV, { GOLDEN_FRAME, ALTREF_FRAME } },
266 267

#if CONFIG_EXT_REFS
268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306
  { NEAR_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEAR_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEW_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEAREST_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEW_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEAR_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEW_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
  { ZERO_ZEROMV, { LAST_FRAME, BWDREF_FRAME } },

  { NEAR_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEAR_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEW_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEAREST_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEW_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEAR_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEW_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
  { ZERO_ZEROMV, { LAST2_FRAME, BWDREF_FRAME } },

  { NEAR_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEAR_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEW_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEAREST_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEW_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEAR_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEW_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
  { ZERO_ZEROMV, { LAST3_FRAME, BWDREF_FRAME } },

  { NEAR_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEAR_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEW_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEAREST_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEW_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEAR_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEW_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { ZERO_ZEROMV, { GOLDEN_FRAME, BWDREF_FRAME } },
307
#endif  // CONFIG_EXT_REFS
308 309 310

#else  // CONFIG_EXT_INTER

311 312
  { NEARMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEWMV, { LAST_FRAME, ALTREF_FRAME } },
313
#if CONFIG_EXT_REFS
314 315 316 317
  { NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
318
#endif  // CONFIG_EXT_REFS
319 320
  { NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
321 322

#if CONFIG_EXT_REFS
323 324 325 326 327 328 329 330
  { NEARMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEWMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
331
#endif  // CONFIG_EXT_REFS
Jingning Han's avatar
Jingning Han committed
332

333
  { ZEROMV, { LAST_FRAME, ALTREF_FRAME } },
334
#if CONFIG_EXT_REFS
335 336
  { ZEROMV, { LAST2_FRAME, ALTREF_FRAME } },
  { ZEROMV, { LAST3_FRAME, ALTREF_FRAME } },
337
#endif  // CONFIG_EXT_REFS
338
  { ZEROMV, { GOLDEN_FRAME, ALTREF_FRAME } },
339 340

#if CONFIG_EXT_REFS
341 342 343 344
  { ZEROMV, { LAST_FRAME, BWDREF_FRAME } },
  { ZEROMV, { LAST2_FRAME, BWDREF_FRAME } },
  { ZEROMV, { LAST3_FRAME, BWDREF_FRAME } },
  { ZEROMV, { GOLDEN_FRAME, BWDREF_FRAME } },
345
#endif  // CONFIG_EXT_REFS
346

347
#endif  // CONFIG_EXT_INTER
Jingning Han's avatar
Jingning Han committed
348

349 350 351 352 353 354 355 356
  { H_PRED, { INTRA_FRAME, NONE } },
  { V_PRED, { INTRA_FRAME, NONE } },
  { D135_PRED, { INTRA_FRAME, NONE } },
  { D207_PRED, { INTRA_FRAME, NONE } },
  { D153_PRED, { INTRA_FRAME, NONE } },
  { D63_PRED, { INTRA_FRAME, NONE } },
  { D117_PRED, { INTRA_FRAME, NONE } },
  { D45_PRED, { INTRA_FRAME, NONE } },
357 358

#if CONFIG_EXT_INTER
359 360 361 362
  { ZEROMV, { LAST_FRAME, INTRA_FRAME } },
  { NEARESTMV, { LAST_FRAME, INTRA_FRAME } },
  { NEARMV, { LAST_FRAME, INTRA_FRAME } },
  { NEWMV, { LAST_FRAME, INTRA_FRAME } },
363 364

#if CONFIG_EXT_REFS
365 366 367 368 369 370 371 372 373
  { ZEROMV, { LAST2_FRAME, INTRA_FRAME } },
  { NEARESTMV, { LAST2_FRAME, INTRA_FRAME } },
  { NEARMV, { LAST2_FRAME, INTRA_FRAME } },
  { NEWMV, { LAST2_FRAME, INTRA_FRAME } },

  { ZEROMV, { LAST3_FRAME, INTRA_FRAME } },
  { NEARESTMV, { LAST3_FRAME, INTRA_FRAME } },
  { NEARMV, { LAST3_FRAME, INTRA_FRAME } },
  { NEWMV, { LAST3_FRAME, INTRA_FRAME } },
374 375
#endif  // CONFIG_EXT_REFS

376 377 378 379
  { ZEROMV, { GOLDEN_FRAME, INTRA_FRAME } },
  { NEARESTMV, { GOLDEN_FRAME, INTRA_FRAME } },
  { NEARMV, { GOLDEN_FRAME, INTRA_FRAME } },
  { NEWMV, { GOLDEN_FRAME, INTRA_FRAME } },
380

381
#if CONFIG_EXT_REFS
382 383 384 385
  { ZEROMV, { BWDREF_FRAME, INTRA_FRAME } },
  { NEARESTMV, { BWDREF_FRAME, INTRA_FRAME } },
  { NEARMV, { BWDREF_FRAME, INTRA_FRAME } },
  { NEWMV, { BWDREF_FRAME, INTRA_FRAME } },
386 387
#endif  // CONFIG_EXT_REFS

388 389 390 391
  { ZEROMV, { ALTREF_FRAME, INTRA_FRAME } },
  { NEARESTMV, { ALTREF_FRAME, INTRA_FRAME } },
  { NEARMV, { ALTREF_FRAME, INTRA_FRAME } },
  { NEWMV, { ALTREF_FRAME, INTRA_FRAME } },
392
#endif  // CONFIG_EXT_INTER
Jingning Han's avatar
Jingning Han committed
393 394
};

Yaowu Xu's avatar
Yaowu Xu committed
395
static const REF_DEFINITION av1_ref_order[MAX_REFS] = {
396
  { { LAST_FRAME, NONE } },
397
#if CONFIG_EXT_REFS
398 399
  { { LAST2_FRAME, NONE } },          { { LAST3_FRAME, NONE } },
  { { BWDREF_FRAME, NONE } },
400
#endif  // CONFIG_EXT_REFS
401
  { { GOLDEN_FRAME, NONE } },         { { ALTREF_FRAME, NONE } },
402

403
  { { LAST_FRAME, ALTREF_FRAME } },
404
#if CONFIG_EXT_REFS
405
  { { LAST2_FRAME, ALTREF_FRAME } },  { { LAST3_FRAME, ALTREF_FRAME } },
406
#endif  // CONFIG_EXT_REFS
407
  { { GOLDEN_FRAME, ALTREF_FRAME } },
408 409

#if CONFIG_EXT_REFS
410 411
  { { LAST_FRAME, BWDREF_FRAME } },   { { LAST2_FRAME, BWDREF_FRAME } },
  { { LAST3_FRAME, BWDREF_FRAME } },  { { GOLDEN_FRAME, BWDREF_FRAME } },
412
#endif  // CONFIG_EXT_REFS
413

414
  { { INTRA_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
415 416
};

hui su's avatar
hui su committed
417 418
static INLINE int write_uniform_cost(int n, int v) {
  int l = get_unsigned_bits(n), m = (1 << l) - n;
419
  if (l == 0) return 0;
hui su's avatar
hui su committed
420
  if (v < m)
Yaowu Xu's avatar
Yaowu Xu committed
421
    return (l - 1) * av1_cost_bit(128, 0);
hui su's avatar
hui su committed
422
  else
Yaowu Xu's avatar
Yaowu Xu committed
423
    return l * av1_cost_bit(128, 0);
hui su's avatar
hui su committed
424 425
}

426 427 428
// constants for prune 1 and prune 2 decision boundaries
#define FAST_EXT_TX_CORR_MID 0.0
#define FAST_EXT_TX_EDST_MID 0.1
429 430 431 432
#define FAST_EXT_TX_CORR_MARGIN 0.5
#define FAST_EXT_TX_EDST_MARGIN 0.3

static const TX_TYPE_1D vtx_tab[TX_TYPES] = {
433
  DCT_1D,      ADST_1D, DCT_1D,      ADST_1D,
434
#if CONFIG_EXT_TX
435 436
  FLIPADST_1D, DCT_1D,  FLIPADST_1D, ADST_1D, FLIPADST_1D, IDTX_1D,
  DCT_1D,      IDTX_1D, ADST_1D,     IDTX_1D, FLIPADST_1D, IDTX_1D,
437 438 439 440
#endif  // CONFIG_EXT_TX
};

static const TX_TYPE_1D htx_tab[TX_TYPES] = {
441
  DCT_1D,  DCT_1D,      ADST_1D,     ADST_1D,
442
#if CONFIG_EXT_TX
443 444
  DCT_1D,  FLIPADST_1D, FLIPADST_1D, FLIPADST_1D, ADST_1D, IDTX_1D,
  IDTX_1D, DCT_1D,      IDTX_1D,     ADST_1D,     IDTX_1D, FLIPADST_1D,
445 446
#endif  // CONFIG_EXT_TX
};
447

Yaowu Xu's avatar
Yaowu Xu committed
448
static void get_energy_distribution_fine(const AV1_COMP *cpi, BLOCK_SIZE bsize,
449 450 451 452 453
                                         uint8_t *src, int src_stride,
                                         uint8_t *dst, int dst_stride,
                                         double *hordist, double *verdist) {
  int bw = 4 << (b_width_log2_lookup[bsize]);
  int bh = 4 << (b_height_log2_lookup[bsize]);
454
  unsigned int esq[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
455 456
  unsigned int var[16];
  double total = 0;
457

458
  const int f_index = bsize - BLOCK_16X16;
459 460 461 462
  if (f_index < 0) {
    int i, j, index;
    int w_shift = bw == 8 ? 1 : 2;
    int h_shift = bh == 8 ? 1 : 2;
Yaowu Xu's avatar
Yaowu Xu committed
463
#if CONFIG_AOM_HIGHBITDEPTH
464 465 466 467 468 469
    if (cpi->common.use_highbitdepth) {
      uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
      uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
      for (i = 0; i < bh; ++i)
        for (j = 0; j < bw; ++j) {
          index = (j >> w_shift) + ((i >> h_shift) << 2);
470 471 472
          esq[index] +=
              (src16[j + i * src_stride] - dst16[j + i * dst_stride]) *
              (src16[j + i * src_stride] - dst16[j + i * dst_stride]);
473 474
        }
    } else {
Yaowu Xu's avatar
Yaowu Xu committed
475
#endif  // CONFIG_AOM_HIGHBITDEPTH
476 477 478 479 480 481 482

      for (i = 0; i < bh; ++i)
        for (j = 0; j < bw; ++j) {
          index = (j >> w_shift) + ((i >> h_shift) << 2);
          esq[index] += (src[j + i * src_stride] - dst[j + i * dst_stride]) *
                        (src[j + i * src_stride] - dst[j + i * dst_stride]);
        }
Yaowu Xu's avatar
Yaowu Xu committed
483
#if CONFIG_AOM_HIGHBITDEPTH
484
    }
Yaowu Xu's avatar
Yaowu Xu committed
485
#endif  // CONFIG_AOM_HIGHBITDEPTH
486
  } else {
487 488 489 490 491
    var[0] = cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[0]);
    var[1] = cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4,
                                     dst_stride, &esq[1]);
    var[2] = cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2,
                                     dst_stride, &esq[2]);
492 493 494 495 496
    var[3] = cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride,
                                     dst + 3 * bw / 4, dst_stride, &esq[3]);
    src += bh / 4 * src_stride;
    dst += bh / 4 * dst_stride;

497 498 499 500 501
    var[4] = cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[4]);
    var[5] = cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4,
                                     dst_stride, &esq[5]);
    var[6] = cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2,
                                     dst_stride, &esq[6]);
502 503 504 505 506
    var[7] = cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride,
                                     dst + 3 * bw / 4, dst_stride, &esq[7]);
    src += bh / 4 * src_stride;
    dst += bh / 4 * dst_stride;

507 508 509 510 511
    var[8] = cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[8]);
    var[9] = cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4,
                                     dst_stride, &esq[9]);
    var[10] = cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2,
                                      dst_stride, &esq[10]);
512 513 514 515 516
    var[11] = cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride,
                                      dst + 3 * bw / 4, dst_stride, &esq[11]);
    src += bh / 4 * src_stride;
    dst += bh / 4 * dst_stride;

517 518 519 520 521 522
    var[12] =
        cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[12]);
    var[13] = cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4,
                                      dst_stride, &esq[13]);
    var[14] = cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2,
                                      dst_stride, &esq[14]);
523 524 525 526
    var[15] = cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride,
                                      dst + 3 * bw / 4, dst_stride, &esq[15]);
  }

527 528 529
  total = esq[0] + esq[1] + esq[2] + esq[3] + esq[4] + esq[5] + esq[6] +
          esq[7] + esq[8] + esq[9] + esq[10] + esq[11] + esq[12] + esq[13] +
          esq[14] + esq[15];
530 531
  if (total > 0) {
    const double e_recip = 1.0 / total;
532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549
    hordist[0] =
        ((double)esq[0] + (double)esq[4] + (double)esq[8] + (double)esq[12]) *
        e_recip;
    hordist[1] =
        ((double)esq[1] + (double)esq[5] + (double)esq[9] + (double)esq[13]) *
        e_recip;
    hordist[2] =
        ((double)esq[2] + (double)esq[6] + (double)esq[10] + (double)esq[14]) *
        e_recip;
    verdist[0] =
        ((double)esq[0] + (double)esq[1] + (double)esq[2] + (double)esq[3]) *
        e_recip;
    verdist[1] =
        ((double)esq[4] + (double)esq[5] + (double)esq[6] + (double)esq[7]) *
        e_recip;
    verdist[2] =
        ((double)esq[8] + (double)esq[9] + (double)esq[10] + (double)esq[11]) *
        e_recip;
550 551 552 553 554
  } else {
    hordist[0] = verdist[0] = 0.25;
    hordist[1] = verdist[1] = 0.25;
    hordist[2] = verdist[2] = 0.25;
  }
555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570
  (void)var[0];
  (void)var[1];
  (void)var[2];
  (void)var[3];
  (void)var[4];
  (void)var[5];
  (void)var[6];
  (void)var[7];
  (void)var[8];
  (void)var[9];
  (void)var[10];
  (void)var[11];
  (void)var[12];
  (void)var[13];
  (void)var[14];
  (void)var[15];
571 572
}

Yaowu Xu's avatar
Yaowu Xu committed
573 574 575
static int adst_vs_flipadst(const AV1_COMP *cpi, BLOCK_SIZE bsize, uint8_t *src,
                            int src_stride, uint8_t *dst, int dst_stride,
                            double *hdist, double *vdist) {
576 577
  int prune_bitmask = 0;
  double svm_proj_h = 0, svm_proj_v = 0;
578 579
  get_energy_distribution_fine(cpi, bsize, src, src_stride, dst, dst_stride,
                               hdist, vdist);
580

581
  svm_proj_v = vdist[0] * ADST_FLIP_SVM[0] + vdist[1] * ADST_FLIP_SVM[1] +
582
               vdist[2] * ADST_FLIP_SVM[2] + ADST_FLIP_SVM[3];
583
  svm_proj_h = hdist[0] * ADST_FLIP_SVM[4] + hdist[1] * ADST_FLIP_SVM[5] +
584 585 586 587 588 589 590 591 592 593 594 595 596 597 598
               hdist[2] * ADST_FLIP_SVM[6] + ADST_FLIP_SVM[7];
  if (svm_proj_v > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN)
    prune_bitmask |= 1 << FLIPADST_1D;
  else if (svm_proj_v < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN)
    prune_bitmask |= 1 << ADST_1D;

  if (svm_proj_h > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN)
    prune_bitmask |= 1 << (FLIPADST_1D + 8);
  else if (svm_proj_h < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN)
    prune_bitmask |= 1 << (ADST_1D + 8);

  return prune_bitmask;
}

#if CONFIG_EXT_TX
599
static void get_horver_correlation(int16_t *diff, int stride, int w, int h,
600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627
                                   double *hcorr, double *vcorr) {
  // Returns hor/ver correlation coefficient
  const int num = (h - 1) * (w - 1);
  double num_r;
  int i, j;
  int64_t xy_sum = 0, xz_sum = 0;
  int64_t x_sum = 0, y_sum = 0, z_sum = 0;
  int64_t x2_sum = 0, y2_sum = 0, z2_sum = 0;
  double x_var_n, y_var_n, z_var_n, xy_var_n, xz_var_n;
  *hcorr = *vcorr = 1;

  assert(num > 0);
  num_r = 1.0 / num;
  for (i = 1; i < h; ++i) {
    for (j = 1; j < w; ++j) {
      const int16_t x = diff[i * stride + j];
      const int16_t y = diff[i * stride + j - 1];
      const int16_t z = diff[(i - 1) * stride + j];
      xy_sum += x * y;
      xz_sum += x * z;
      x_sum += x;
      y_sum += y;
      z_sum += z;
      x2_sum += x * x;
      y2_sum += y * y;
      z2_sum += z * z;
    }
  }
628 629 630
  x_var_n = x2_sum - (x_sum * x_sum) * num_r;
  y_var_n = y2_sum - (y_sum * y_sum) * num_r;
  z_var_n = z2_sum - (z_sum * z_sum) * num_r;
631 632 633 634 635 636 637 638 639 640 641 642
  xy_var_n = xy_sum - (x_sum * y_sum) * num_r;
  xz_var_n = xz_sum - (x_sum * z_sum) * num_r;
  if (x_var_n > 0 && y_var_n > 0) {
    *hcorr = xy_var_n / sqrt(x_var_n * y_var_n);
    *hcorr = *hcorr < 0 ? 0 : *hcorr;
  }
  if (x_var_n > 0 && z_var_n > 0) {
    *vcorr = xz_var_n / sqrt(x_var_n * z_var_n);
    *vcorr = *vcorr < 0 ? 0 : *vcorr;
  }
}

643 644
int dct_vs_idtx(int16_t *diff, int stride, int w, int h, double *hcorr,
                double *vcorr) {
645 646 647 648
  int prune_bitmask = 0;
  get_horver_correlation(diff, stride, w, h, hcorr, vcorr);

  if (*vcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN)
649
    prune_bitmask |= 1 << IDTX_1D;
650 651 652 653
  else if (*vcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN)
    prune_bitmask |= 1 << DCT_1D;

  if (*hcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN)
654
    prune_bitmask |= 1 << (IDTX_1D + 8);
655 656 657 658 659 660
  else if (*hcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN)
    prune_bitmask |= 1 << (DCT_1D + 8);
  return prune_bitmask;
}

// Performance drop: 0.5%, Speed improvement: 24%
Yaowu Xu's avatar
Yaowu Xu committed
661
static int prune_two_for_sby(const AV1_COMP *cpi, BLOCK_SIZE bsize,
662
                             MACROBLOCK *x, MACROBLOCKD *xd, int adst_flipadst,
663
                             int dct_idtx) {
664 665 666 667 668
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];
  const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
  const int bw = 4 << (b_width_log2_lookup[bs]);
  const int bh = 4 << (b_height_log2_lookup[bs]);
669
  double hdist[3] = { 0, 0, 0 }, vdist[3] = { 0, 0, 0 };
670
  double hcorr, vcorr;
671
  int prune = 0;
Yaowu Xu's avatar
Yaowu Xu committed
672
  av1_subtract_plane(x, bsize, 0);
673

674 675 676
  if (adst_flipadst)
    prune |= adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride,
                              pd->dst.buf, pd->dst.stride, hdist, vdist);
677
  if (dct_idtx) prune |= dct_vs_idtx(p->src_diff, bw, bw, bh, &hcorr, &vcorr);
678 679 680

  return prune;
}
681 682
#endif  // CONFIG_EXT_TX

683
// Performance drop: 0.3%, Speed improvement: 5%
Yaowu Xu's avatar
Yaowu Xu committed
684
static int prune_one_for_sby(const AV1_COMP *cpi, BLOCK_SIZE bsize,
685
                             MACROBLOCK *x, MACROBLOCKD *xd) {
686 687
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];
688
  double hdist[3] = { 0, 0, 0 }, vdist[3] = { 0, 0, 0 };
Yaowu Xu's avatar
Yaowu Xu committed
689
  av1_subtract_plane(x, bsize, 0);
690 691
  return adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride, pd->dst.buf,
                          pd->dst.stride, hdist, vdist);
692 693
}

Yaowu Xu's avatar
Yaowu Xu committed
694
static int prune_tx_types(const AV1_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x,
695 696 697 698
                          MACROBLOCKD *xd, int tx_set) {
#if CONFIG_EXT_TX
  const int *tx_set_1D = ext_tx_used_inter_1D[tx_set];
#else
699
  const int tx_set_1D[TX_TYPES_1D] = { 0 };
700 701
#endif

702
  switch (cpi->sf.tx_type_search.prune_mode) {
703 704
    case NO_PRUNE: return 0; break;
    case PRUNE_ONE:
705 706
      if ((tx_set >= 0) & !(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D]))
        return 0;
707 708
      return prune_one_for_sby(cpi, bsize, x, xd);
      break;
709 710
#if CONFIG_EXT_TX
    case PRUNE_TWO:
711
      if ((tx_set >= 0) & !(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D])) {
712
        if (!(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D])) return 0;
713 714 715 716 717
        return prune_two_for_sby(cpi, bsize, x, xd, 0, 1);
      }
      if ((tx_set >= 0) & !(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D]))
        return prune_two_for_sby(cpi, bsize, x, xd, 1, 0);
      return prune_two_for_sby(cpi, bsize, x, xd, 1, 1);
718
      break;
719
#endif
720 721 722 723 724
  }
  assert(0);
  return 0;
}

725
static int do_tx_type_search(TX_TYPE tx_type, int prune) {
726 727 728
// TODO(sarahparker) implement for non ext tx
#if CONFIG_EXT_TX
  return !(((prune >> vtx_tab[tx_type]) & 1) |
729
           ((prune >> (htx_tab[tx_type] + 8)) & 1));
730 731
#else
  // temporary to avoid compiler warnings
732 733 734 735
  (void)vtx_tab;
  (void)htx_tab;
  (void)tx_type;
  (void)prune;
736 737 738 739
  return 1;
#endif
}

Yaowu Xu's avatar
Yaowu Xu committed
740
static void model_rd_from_sse(const AV1_COMP *const cpi,
741 742
                              const MACROBLOCKD *const xd, BLOCK_SIZE bsize,
                              int plane, int64_t sse, int *rate,
Geza Lore's avatar
Geza Lore committed
743 744 745
                              int64_t *dist) {
  const struct macroblockd_plane *const pd = &xd->plane[plane];
  const int dequant_shift =
Yaowu Xu's avatar
Yaowu Xu committed
746
#if CONFIG_AOM_HIGHBITDEPTH
747
      (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 :
Yaowu Xu's avatar
Yaowu Xu committed
748
#endif  // CONFIG_AOM_HIGHBITDEPTH
749
                                                    3;
Geza Lore's avatar
Geza Lore committed
750 751 752 753 754 755 756

  // Fast approximate the modelling function.
  if (cpi->sf.simple_model_rd_from_var) {
    const int64_t square_error = sse;
    int quantizer = (pd->dequant[1] >> dequant_shift);

    if (quantizer < 120)
Yaowu Xu's avatar
Yaowu Xu committed
757
      *rate = (int)((square_error * (280 - quantizer)) >>
Yaowu Xu's avatar
Yaowu Xu committed
758
                    (16 - AV1_PROB_COST_SHIFT));
Geza Lore's avatar
Geza Lore committed
759 760 761 762
    else
      *rate = 0;
    *dist = (square_error * quantizer) >> 8;
  } else {
Yaowu Xu's avatar
Yaowu Xu committed
763 764
    av1_model_rd_from_var_lapndz(sse, num_pels_log2_lookup[bsize],
                                 pd->dequant[1] >> dequant_shift, rate, dist);
Geza Lore's avatar
Geza Lore committed
765 766 767 768 769
  }

  *dist <<= 4;
}

Yaowu Xu's avatar
Yaowu Xu committed
770
static void model_rd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
771 772 773 774
                            MACROBLOCK *x, MACROBLOCKD *xd, int plane_from,
                            int plane_to, int *out_rate_sum,
                            int64_t *out_dist_sum, int *skip_txfm_sb,
                            int64_t *skip_sse_sb) {
Jingning Han's avatar
Jingning Han committed
775 776 777
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
Geza Lore's avatar
Geza Lore committed
778 779 780
  int plane;
  const int ref = xd->mi[0]->mbmi.ref_frame[0];

Jingning Han's avatar
Jingning Han committed
781 782 783 784 785 786
  int64_t rate_sum = 0;
  int64_t dist_sum = 0;
  int64_t total_sse = 0;

  x->pred_sse[ref] = 0;

Geza Lore's avatar
Geza Lore committed
787 788 789
  for (plane = plane_from; plane <= plane_to; ++plane) {
    struct macroblock_plane *const p = &x->plane[plane];
    struct macroblockd_plane *const pd = &xd->plane[plane];
Jingning Han's avatar
Jingning Han committed
790 791
    const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);

Geza Lore's avatar
Geza Lore committed
792 793 794
    unsigned int sse;
    int rate;
    int64_t dist;
Jingning Han's avatar
Jingning Han committed
795

Geza Lore's avatar
Geza Lore committed
796 797
    // TODO(geza): Write direct sse functions that do not compute
    // variance as well.
798 799
    cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride,
                       &sse);
Jingning Han's avatar
Jingning Han committed
800

801
    if (plane == 0) x->pred_sse[ref] = sse;
Jingning Han's avatar
Jingning Han committed
802

Geza Lore's avatar
Geza Lore committed
803
    total_sse += sse;
Jingning Han's avatar
Jingning Han committed
804

Geza Lore's avatar
Geza Lore committed
805
    model_rd_from_sse(cpi, xd, bs, plane, sse, &rate, &dist);
Geza Lore's avatar
Geza Lore committed
806 807 808

    rate_sum += rate;
    dist_sum += dist;
Jingning Han's avatar
Jingning Han committed
809 810
  }

Geza Lore's avatar
Geza Lore committed
811
  *skip_txfm_sb = total_sse == 0;
Jingning Han's avatar
Jingning Han committed
812 813
  *skip_sse_sb = total_sse << 4;
  *out_rate_sum = (int)rate_sum;
Geza Lore's avatar
Geza Lore committed
814
  *out_dist_sum = dist_sum;
Jingning Han's avatar
Jingning Han committed
815 816
}

Yaowu Xu's avatar
Yaowu Xu committed
817 818
int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
                          intptr_t block_size, int64_t *ssz) {
Jingning Han's avatar
Jingning Han committed
819 820 821 822 823
  int i;
  int64_t error = 0, sqcoeff = 0;

  for (i = 0; i < block_size; i++) {
    const int diff = coeff[i] - dqcoeff[i];
824
    error += diff * diff;
Jingning Han's avatar
Jingning Han committed
825 826 827 828 829 830 831
    sqcoeff += coeff[i] * coeff[i];
  }

  *ssz = sqcoeff;
  return error;
}

Yaowu Xu's avatar
Yaowu Xu committed
832 833
int64_t av1_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff,
                             int block_size) {
Jingning Han's avatar
Jingning Han committed
834 835 836 837 838
  int i;
  int64_t error = 0;

  for (i = 0; i < block_size; i++) {
    const int diff = coeff[i] - dqcoeff[i];
839
    error += diff * diff;
Jingning Han's avatar
Jingning Han committed
840 841 842 843 844
  }

  return error;
}

Yaowu Xu's avatar
Yaowu Xu committed
845 846 847 848
#if CONFIG_AOM_HIGHBITDEPTH
int64_t av1_highbd_block_error_c(const tran_low_t *coeff,
                                 const tran_low_t *dqcoeff, intptr_t block_size,
                                 int64_t *ssz, int bd) {
Jingning Han's avatar
Jingning Han committed
849 850 851 852 853 854 855
  int i;
  int64_t error = 0, sqcoeff = 0;
  int shift = 2 * (bd - 8);
  int rounding = shift > 0 ? 1 << (shift - 1) : 0;

  for (i = 0; i < block_size; i++) {
    const int64_t diff = coeff[i] - dqcoeff[i];
856
    error += diff * diff;
Jingning Han's avatar
Jingning Han committed
857 858 859 860 861 862 863 864 865
    sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
  }
  assert(error >= 0 && sqcoeff >= 0);
  error = (error + rounding) >> shift;
  sqcoeff = (sqcoeff + rounding) >> shift;

  *ssz = sqcoeff;
  return error;
}
Yaowu Xu's avatar
Yaowu Xu committed
866
#endif  // CONFIG_AOM_HIGHBITDEPTH
Jingning Han's avatar
Jingning Han committed
867 868 869 870 871 872

/* The trailing '0' is a terminator which is used inside cost_coeffs() to
 * decide whether to include cost of a trailing EOB node or not (i.e. we
 * can skip this if the last coefficient in this transform block, e.g. the
 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
 * were non-zero). */
873
static int cost_coeffs(MACROBLOCK *x, int plane, int block,
874 875 876
#if CONFIG_VAR_TX
                       int coeff_ctx,
#else
Jingning Han's avatar
Jingning Han committed
877
                       ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
878
#endif
879
                       TX_SIZE tx_size, const int16_t *scan, const int16_t *nb,
Jingning Han's avatar
Jingning Han committed
880 881 882 883 884 885
                       int use_fast_coef_costing) {
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
  const struct macroblock_plane *p = &x->plane[plane];
  const struct macroblockd_plane *pd = &xd->plane[plane];
  const PLANE_TYPE type = pd->plane_type;
886
  const uint16_t *band_count = &band_count_table[tx_size][1];
Jingning Han's avatar
Jingning Han committed
887 888
  const int eob = p->eobs[block];
  const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
889
  const int tx_size_ctx = txsize_sqr_map[tx_size];
890 891
  unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
      x->token_costs[tx_size_ctx][type][is_inter_block(mbmi)];
892
  uint8_t token_cache[MAX_TX_SQUARE];
893 894 895
#if CONFIG_VAR_TX
  int pt = coeff_ctx;
#else
Jingning Han's avatar
Jingning Han committed
896
  int pt = combine_entropy_contexts(*A, *L);
897
#endif
Jingning Han's avatar
Jingning Han committed
898
  int c, cost;
Yaowu Xu's avatar
Yaowu Xu committed
899 900
#if CONFIG_AOM_HIGHBITDEPTH
  const int *cat6_high_cost = av1_get_high_cost_table(xd->bd);
Jingning Han's avatar
Jingning Han committed
901
#else
Yaowu Xu's avatar
Yaowu Xu committed
902
  const int *cat6_high_cost = av1_get_high_cost_table(8);
Jingning Han's avatar
Jingning Han committed
903 904
#endif

905
#if !CONFIG_VAR_TX && !CONFIG_SUPERTX
Jingning Han's avatar
Jingning Han committed
906 907 908
  // Check for consistency of tx_size with mode info
  assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
                              : get_uv_tx_size(mbmi, pd) == tx_size);
909
#endif  // !CONFIG_VAR_TX && !CONFIG_SUPERTX
Jingning Han's avatar
Jingning Han committed
910 911 912 913 914 915

  if (eob == 0) {
    // single eob token
    cost = token_costs[0][0][pt][EOB_TOKEN];
    c = 0;
  } else {
Julia Robson's avatar
Julia Robson committed
916 917 918 919 920 921
    if (use_fast_coef_costing) {
      int band_left = *band_count++;

      // dc token
      int v = qcoeff[0];
      int16_t prev_t;
Yaowu Xu's avatar
Yaowu Xu committed
922
      cost = av1_get_token_cost(v, &prev_t, cat6_high_cost);
Julia Robson's avatar
Julia Robson committed
923 924
      cost += (*token_costs)[0][pt][prev_t];

Yaowu Xu's avatar
Yaowu Xu committed
925
      token_cache[0] = av1_pt_energy_class[prev_t];
Julia Robson's avatar
Julia Robson committed
926 927 928 929 930 931 932 933
      ++token_costs;

      // ac tokens
      for (c = 1; c < eob; c++) {
        const int rc = scan[c];
        int16_t t;

        v = qcoeff[rc];
Yaowu Xu's avatar
Yaowu Xu committed
934
        cost += av1_get_token_cost(v, &t, cat6_high_cost);
Julia Robson's avatar
Julia Robson committed
935 936 937 938 939 940
        cost += (*token_costs)[!prev_t][!prev_t][t];
        prev_t = t;
        if (!--band_left) {
          band_left = *band_count++;
          ++token_costs;
        }
Jingning Han's avatar
Jingning Han committed
941 942
      }

Julia Robson's avatar
Julia Robson committed
943
      // eob token
944
      if (band_left) cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
Julia Robson's avatar
Julia Robson committed
945 946 947 948 949 950 951

    } else {  // !use_fast_coef_costing
      int band_left = *band_count++;

      // dc token
      int v = qcoeff[0];
      int16_t tok;
952
      unsigned int(*tok_cost_ptr)[COEFF_CONTEXTS][ENTROPY_TOKENS];
Yaowu Xu's avatar
Yaowu Xu committed
953
      cost = av1_get_token_cost(v, &tok, cat6_high_cost);
Julia Robson's avatar
Julia Robson committed
954 955
      cost += (*token_costs)[0][pt][tok];

Yaowu Xu's avatar
Yaowu Xu committed
956
      token_cache[0] = av1_pt_energy_class[tok];
Julia Robson's avatar
Julia Robson committed
957 958 959 960 961 962 963 964 965
      ++token_costs;

      tok_cost_ptr = &((*token_costs)[!tok]);

      // ac tokens
      for (c = 1; c < eob; c++) {
        const int rc = scan[c];

        v = qcoeff[rc];
Yaowu Xu's avatar
Yaowu Xu committed
966
        cost += av1_get_token_cost(v, &tok, cat6_high_cost);
Julia Robson's avatar
Julia Robson committed
967 968
        pt = get_coef_context(nb, token_cache, c);
        cost += (*tok_cost_ptr)[pt][tok];
Yaowu Xu's avatar
Yaowu Xu committed
969
        token_cache[rc] = av1_pt_energy_class[tok];
Julia Robson's avatar
Julia Robson committed
970 971 972 973 974 975 976 977 978
        if (!--band_left) {
          band_left = *band_count++;
          ++token_costs;
        }
        tok_cost_ptr = &((*token_costs)[!tok]);
      }

      // eob token
      if (band_left) {
Jingning Han's avatar
Jingning Han committed
979 980 981 982 983 984
        pt = get_coef_context(nb, token_cache, c);
        cost += (*token_costs)[0][pt][EOB_TOKEN];
      }
    }
  }

985
#if !CONFIG_VAR_TX
Jingning Han's avatar
Jingning Han committed
986 987
  // is eob first coefficient;
  *A = *L = (c > 0);
988
#endif
Jingning Han's avatar
Jingning Han committed
989 990 991 992

  return cost;
}

Yaowu Xu's avatar
Yaowu Xu committed
993 994
static void dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane, int block,
                       int blk_row, int blk_col, TX_SIZE tx_size,
Jingning Han's avatar
Jingning Han committed
995
                       int64_t *out_dist, int64_t *out_sse) {
996
  MACROBLOCKD *const xd = &x->e_mbd;
Alex Converse's avatar
Alex Converse committed
997 998
  const struct macroblock_plane *const p = &x->plane[plane];
  const struct macroblockd_plane *const pd = &xd->plane[plane];
999 1000 1001
  if (cpi->sf.use_transform_domain_distortion) {
    // Transform domain distortion computation is more accurate as it does
    // not involve an inverse transform, but it is less accurate.
1002
    const int ss_txfrm_size = num_4x4_blocks_txsize_log2_lookup[tx_size];
1003
    int64_t this_sse;
Angie Chiang's avatar
Angie Chiang committed
1004 1005
    int tx_type = get_tx_type(pd->plane_type, xd, block, tx_size);
    int shift = (MAX_TX_SCALE - get_tx_scale(xd, tx_type, tx_size)) * 2;
1006 1007
    tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
    tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
Yaowu Xu's avatar
Yaowu Xu committed
1008
#if CONFIG_AOM_HIGHBITDEPTH
1009
    const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8;
Yaowu Xu's avatar
Yaowu Xu committed
1010 1011
    *out_dist = av1_highbd_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
                                       &this_sse, bd) >>
1012
                shift;
1013
#else
1014
    *out_dist =
Yaowu Xu's avatar
Yaowu Xu committed
1015
        av1_block_error(coeff, dqcoeff, 16 << ss_txfrm_size, &this_sse) >>
1016
        shift;
Yaowu Xu's avatar
Yaowu Xu committed
1017
#endif  // CONFIG_AOM_HIGHBITDEPTH
1018 1019 1020
    *out_sse = this_sse >> shift;
  } else {
    const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
1021 1022
    const int bsw = 4 * num_4x4_blocks_wide_lookup[tx_bsize];
    const int bsh = 4 * num_4x4_blocks_high_lookup[tx_bsize];
1023 1024 1025 1026 1027 1028 1029
    const int src_stride = x->plane[plane].src.stride;
    const int dst_stride = xd->plane[plane].dst.stride;
    const int src_idx = 4 * (blk_row * src_stride + blk_col);
    const int dst_idx = 4 * (blk_row * dst_stride + blk_col);
    const uint8_t *src = &x->plane[plane].src.buf[src_idx];
    const uint8_t *dst = &xd->plane[plane].dst.buf[dst_idx];
    const tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
Alex Converse's avatar
Alex Converse committed
1030
    const uint16_t eob = p->eobs[block];
1031 1032 1033 1034 1035 1036 1037 1038

    unsigned int tmp;

    assert(cpi != NULL);

    cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &tmp);
    *out_sse = (int64_t)tmp * 16;

Alex Converse's avatar
Alex Converse committed
1039
    if (eob) {
1040
      const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
Yaowu Xu's avatar
Yaowu Xu committed
1041
#if CONFIG_AOM_HIGHBITDEPTH
1042
      DECLARE_ALIGNED(16, uint16_t, recon16[MAX_TX_SQUARE]);
1043
      uint8_t *recon = (uint8_t *)recon16;
Jingning Han's avatar
Jingning Han committed
1044
#else
1045
      DECLARE_ALIGNED(16, uint8_t, recon[MAX_TX_SQUARE]);
Yaowu Xu's avatar
Yaowu Xu committed
1046
#endif  // CONFIG_AOM_HIGHBITDEPTH
1047 1048 1049 1050 1051 1052 1053

      const PLANE_TYPE plane_type = plane == 0 ? PLANE_TYPE_Y : PLANE_TYPE_UV;

      INV_TXFM_PARAM inv_txfm_param;

      inv_txfm_param.tx_type = get_tx_type(plane_type, xd, block, tx_size);
      inv_txfm_param.tx_size = tx_size;
Alex Converse's avatar
Alex Converse committed
1054
      inv_txfm_param.eob = eob;
1055 1056
      inv_txfm_param.lossless = xd->lossless[mbmi->segment_id];

Yaowu Xu's avatar
Yaowu Xu committed
1057
#if CONFIG_AOM_HIGHBITDEPTH
1058 1059 1060
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        recon = CONVERT_TO_BYTEPTR(recon);
        inv_txfm_param.bd = xd->bd;
Yaowu Xu's avatar
Yaowu Xu committed
1061
        aom_highbd_convolve_copy(dst, dst_stride, recon, MAX_TX_SIZE, NULL, 0,
1062
                                 NULL, 0, bsw, bsh, xd->bd);
1063
        highbd_inv_txfm_add(dqcoeff, recon, MAX_TX_SIZE, &inv_txfm_param);
1064
      } else
Yaowu Xu's avatar
Yaowu Xu committed
1065
#endif  // CONFIG_AOM_HIGHBITDEPTH
1066
      {
Yaowu Xu's avatar
Yaowu Xu committed
1067
        aom_convolve_copy(dst, dst_stride, recon, MAX_TX_SIZE, NULL, 0, NULL, 0,
1068
                          bsw, bsh);
1069
        inv_txfm_add(dqcoeff, recon, MAX_TX_SIZE, &inv_txfm_param);
1070 1071
      }

1072
      cpi->fn_ptr[tx_bsize].vf(src, src_stride, recon, MAX_TX_SIZE, &tmp);
1073 1074 1075 1076
    }

    *out_dist = (int64_t)tmp * 16;
  }
Jingning Han's avatar
Jingning Han committed
1077 1078
}

1079
static int rate_block(int plane, int block, int blk_row, int blk_col,
1080
                      TX_SIZE tx_size, struct rdcost_block_args *args) {
1081 1082 1083
#if CONFIG_VAR_TX
  int coeff_ctx = combine_entropy_contexts(*(args->t_above + blk_col),
                                           *(args->t_left + blk_row));
1084 1085 1086
  int coeff_cost =
      cost_coeffs(args->x, plane, block, coeff_ctx, tx_size, args->so->scan,
                  args->so->neighbors, args->use_fast_coef_costing);
1087 1088
  const struct macroblock_plane *p = &args->x->plane[plane];
  *(args->t_above + blk_col) = !(p->eobs[block] == 0);
1089
  *(args->t_left + blk_row) = !(p->eobs[block] == 0);
1090 1091
  return coeff_cost;
#else
1092 1093 1094
  return cost_coeffs(args->x, plane, block, args->t_above + blk_col,
                     args->t_left + blk_row, tx_size, args->so->scan,
                     args->so->neighbors, args->use_fast_coef_costing);
1095
#endif  // CONFIG_VAR_TX
Jingning Han's avatar
Jingning Han committed
1096 1097
}

1098 1099 1100 1101 1102 1103
static uint64_t sum_squares_2d(const int16_t *diff, int diff_stride,
                               TX_SIZE tx_size) {
  uint64_t sse;
  switch (tx_size) {
#if CONFIG_EXT_TX
    case TX_4X8:
Yaowu Xu's avatar
Yaowu Xu committed
1104 1105
      sse = aom_sum_squares_2d_i16(diff, diff_stride, 4) +
            aom_sum_squares_2d_i16(diff + 4 * diff_stride, diff_stride, 4);
1106 1107
      break;
    case TX_8X4:
Yaowu Xu's avatar
Yaowu Xu committed
1108 1109
      sse = aom_sum_squares_2d_i16(diff, diff_stride, 4) +
            aom_sum_squares_2d_i16(diff + 4, diff_stride, 4);
1110
      break;
1111
    case TX_8X16:
Yaowu Xu's avatar
Yaowu Xu committed
1112 1113
      sse = aom_sum_squares_2d_i16(diff, diff_stride, 8) +
            aom_sum_squares_2d_i16(diff + 8 * diff_stride, diff_stride, 8);
1114 1115
      break;
    case TX_16X8:
Yaowu Xu's avatar
Yaowu Xu committed
1116 1117
      sse = aom_sum_squares_2d_i16(diff, diff_stride, 8) +
            aom_sum_squares_2d_i16(diff + 8, diff_stride, 8);
1118 1119
      break;
    case TX_16X32: