rdopt.c 405 KB
Newer Older
Jingning Han's avatar
Jingning Han committed
1
/*
Yaowu Xu's avatar
Yaowu Xu committed
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Jingning Han's avatar
Jingning Han committed
3
 *
Yaowu Xu's avatar
Yaowu Xu committed
4 5 6 7 8 9
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Jingning Han's avatar
Jingning Han committed
10 11 12 13 14
 */

#include <assert.h>
#include <math.h>

Yaowu Xu's avatar
Yaowu Xu committed
15
#include "./aom_dsp_rtcd.h"
Jingning Han's avatar
Jingning Han committed
16
#include "./av1_rtcd.h"
Jingning Han's avatar
Jingning Han committed
17

Yaowu Xu's avatar
Yaowu Xu committed
18
#include "aom_dsp/aom_dsp_common.h"
19
#include "aom_dsp/blend.h"
Yaowu Xu's avatar
Yaowu Xu committed
20
#include "aom_mem/aom_mem.h"
21 22
#include "aom_ports/mem.h"
#include "aom_ports/system_state.h"
Jingning Han's avatar
Jingning Han committed
23

24 25 26 27 28 29 30 31 32 33 34 35
#include "av1/common/common.h"
#include "av1/common/common_data.h"
#include "av1/common/entropy.h"
#include "av1/common/entropymode.h"
#include "av1/common/idct.h"
#include "av1/common/mvref_common.h"
#include "av1/common/pred_common.h"
#include "av1/common/quant_common.h"
#include "av1/common/reconinter.h"
#include "av1/common/reconintra.h"
#include "av1/common/scan.h"
#include "av1/common/seg_common.h"
Jingning Han's avatar
Jingning Han committed
36

Jingning Han's avatar
Jingning Han committed
37
#include "av1/encoder/aq_variance.h"
38 39 40 41 42 43
#include "av1/encoder/cost.h"
#include "av1/encoder/encodemb.h"
#include "av1/encoder/encodemv.h"
#include "av1/encoder/encoder.h"
#include "av1/encoder/hybrid_fwd_txfm.h"
#include "av1/encoder/mcomp.h"
44
#if CONFIG_PALETTE
45
#include "av1/encoder/palette.h"
46
#endif  // CONFIG_PALETTE
47 48 49 50
#include "av1/encoder/quantize.h"
#include "av1/encoder/ratectrl.h"
#include "av1/encoder/rd.h"
#include "av1/encoder/rdopt.h"
51
#include "av1/encoder/tokenize.h"
52 53 54
#if CONFIG_PVQ
#include "av1/encoder/pvq_encoder.h"
#endif
55 56 57
#if CONFIG_DUAL_FILTER
#if CONFIG_EXT_INTERP
static const int filter_sets[25][2] = {
58 59 60 61
  { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 }, { 0, 4 }, { 1, 0 }, { 1, 1 },
  { 1, 2 }, { 1, 3 }, { 1, 4 }, { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 },
  { 2, 4 }, { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 }, { 3, 4 }, { 4, 0 },
  { 4, 1 }, { 4, 2 }, { 4, 3 }, { 4, 4 },
62 63 64
};
#else
static const int filter_sets[9][2] = {
65 66
  { 0, 0 }, { 0, 1 }, { 0, 2 }, { 1, 0 }, { 1, 1 },
  { 1, 2 }, { 2, 0 }, { 2, 1 }, { 2, 2 },
67 68 69 70
};
#endif
#endif

71

72 73
#if CONFIG_EXT_REFS

74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
#define LAST_FRAME_MODE_MASK                                      \
  ((1 << INTRA_FRAME) | (1 << LAST2_FRAME) | (1 << LAST3_FRAME) | \
   (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
#define LAST2_FRAME_MODE_MASK                                    \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST3_FRAME) | \
   (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
#define LAST3_FRAME_MODE_MASK                                    \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
   (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
#define GOLDEN_FRAME_MODE_MASK                                   \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
   (1 << LAST3_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
#define BWDREF_FRAME_MODE_MASK                                   \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
   (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME))
#define ALTREF_FRAME_MODE_MASK                                   \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
   (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME))

#else

#define LAST_FRAME_MODE_MASK \
  ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
#define GOLDEN_FRAME_MODE_MASK \
  ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
#define ALTREF_FRAME_MODE_MASK \
  ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | (1 << INTRA_FRAME))
Jingning Han's avatar
Jingning Han committed
101

102 103
#endif  // CONFIG_EXT_REFS

104
#if CONFIG_EXT_REFS
105
#define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | (1 << BWDREF_FRAME) | 0x01)
106
#else
107
#define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | 0x01)
108
#endif  // CONFIG_EXT_REFS
Jingning Han's avatar
Jingning Han committed
109

110 111
#define MIN_EARLY_TERM_INDEX 3
#define NEW_MV_DISCOUNT_FACTOR 8
Jingning Han's avatar
Jingning Han committed
112

113 114 115 116 117 118
#if CONFIG_EXT_INTRA
#define ANGLE_FAST_SEARCH 1
#define ANGLE_SKIP_THRESH 10
#define FILTER_FAST_SEARCH 1
#endif  // CONFIG_EXT_INTRA

119 120
const double ADST_FLIP_SVM[8] = { -6.6623, -2.8062, -3.2531, 3.1671,    // vert
                                  -7.7051, -3.2234, -3.6193, 3.4533 };  // horz
121

Jingning Han's avatar
Jingning Han committed
122 123 124 125 126
typedef struct {
  PREDICTION_MODE mode;
  MV_REFERENCE_FRAME ref_frame[2];
} MODE_DEFINITION;

127
typedef struct { MV_REFERENCE_FRAME ref_frame[2]; } REF_DEFINITION;
Jingning Han's avatar
Jingning Han committed
128 129

struct rdcost_block_args {
Yaowu Xu's avatar
Yaowu Xu committed
130
  const AV1_COMP *cpi;
Jingning Han's avatar
Jingning Han committed
131
  MACROBLOCK *x;
132 133
  ENTROPY_CONTEXT t_above[2 * MAX_MIB_SIZE];
  ENTROPY_CONTEXT t_left[2 * MAX_MIB_SIZE];
Jingning Han's avatar
Jingning Han committed
134 135 136 137 138 139 140
  int this_rate;
  int64_t this_dist;
  int64_t this_sse;
  int64_t this_rd;
  int64_t best_rd;
  int exit_early;
  int use_fast_coef_costing;
141
  const SCAN_ORDER *scan_order;
Jingning Han's avatar
Jingning Han committed
142 143 144 145
  uint8_t skippable;
};

#define LAST_NEW_MV_INDEX 6
Yaowu Xu's avatar
Yaowu Xu committed
146
static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
147
  { NEARESTMV, { LAST_FRAME, NONE } },
148
#if CONFIG_EXT_REFS
149 150 151
  { NEARESTMV, { LAST2_FRAME, NONE } },
  { NEARESTMV, { LAST3_FRAME, NONE } },
  { NEARESTMV, { BWDREF_FRAME, NONE } },
152
#endif  // CONFIG_EXT_REFS
153 154
  { NEARESTMV, { ALTREF_FRAME, NONE } },
  { NEARESTMV, { GOLDEN_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
155

156
  { DC_PRED, { INTRA_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
157

158
  { NEWMV, { LAST_FRAME, NONE } },
159
#if CONFIG_EXT_REFS
160 161 162
  { NEWMV, { LAST2_FRAME, NONE } },
  { NEWMV, { LAST3_FRAME, NONE } },
  { NEWMV, { BWDREF_FRAME, NONE } },
163
#endif  // CONFIG_EXT_REFS
164 165
  { NEWMV, { ALTREF_FRAME, NONE } },
  { NEWMV, { GOLDEN_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
166

167
  { NEARMV, { LAST_FRAME, NONE } },
168
#if CONFIG_EXT_REFS
169 170 171
  { NEARMV, { LAST2_FRAME, NONE } },
  { NEARMV, { LAST3_FRAME, NONE } },
  { NEARMV, { BWDREF_FRAME, NONE } },
172
#endif  // CONFIG_EXT_REFS
173 174
  { NEARMV, { ALTREF_FRAME, NONE } },
  { NEARMV, { GOLDEN_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
175

Yue Chen's avatar
Yue Chen committed
176
#if CONFIG_EXT_INTER
177
  { NEWFROMNEARMV, { LAST_FRAME, NONE } },
178
#if CONFIG_EXT_REFS
179 180 181
  { NEWFROMNEARMV, { LAST2_FRAME, NONE } },
  { NEWFROMNEARMV, { LAST3_FRAME, NONE } },
  { NEWFROMNEARMV, { BWDREF_FRAME, NONE } },
182
#endif  // CONFIG_EXT_REFS
183 184
  { NEWFROMNEARMV, { ALTREF_FRAME, NONE } },
  { NEWFROMNEARMV, { GOLDEN_FRAME, NONE } },
Yue Chen's avatar
Yue Chen committed
185 186
#endif  // CONFIG_EXT_INTER

187
  { ZEROMV, { LAST_FRAME, NONE } },
188
#if CONFIG_EXT_REFS
189 190 191
  { ZEROMV, { LAST2_FRAME, NONE } },
  { ZEROMV, { LAST3_FRAME, NONE } },
  { ZEROMV, { BWDREF_FRAME, NONE } },
192
#endif  // CONFIG_EXT_REFS
193 194
  { ZEROMV, { GOLDEN_FRAME, NONE } },
  { ZEROMV, { ALTREF_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
195

196
// TODO(zoeliu): May need to reconsider the order on the modes to check
197

198
#if CONFIG_EXT_INTER
199
  { NEAREST_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
200
#if CONFIG_EXT_REFS
201 202
  { NEAREST_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEAREST_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
203
#endif  // CONFIG_EXT_REFS
204
  { NEAREST_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
205
#if CONFIG_EXT_REFS
206 207 208 209
  { NEAREST_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
210
#endif  // CONFIG_EXT_REFS
211

212
#else  // CONFIG_EXT_INTER
213

214
  { NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
215
#if CONFIG_EXT_REFS
216 217
  { NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
218
#endif  // CONFIG_EXT_REFS
219
  { NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
220
#if CONFIG_EXT_REFS
221 222 223 224
  { NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
225
#endif  // CONFIG_EXT_REFS
226
#endif  // CONFIG_EXT_INTER
Jingning Han's avatar
Jingning Han committed
227

228
  { TM_PRED, { INTRA_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
229

230
#if CONFIG_EXT_INTER
231 232 233 234 235 236 237 238 239
  { NEAR_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEAREST_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEAR_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEW_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEAREST_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEW_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEAR_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEW_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
  { ZERO_ZEROMV, { LAST_FRAME, ALTREF_FRAME } },
240

241
#if CONFIG_EXT_REFS
242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260
  { NEAR_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEAREST_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEAR_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEW_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEAREST_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEW_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEAR_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEW_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
  { ZERO_ZEROMV, { LAST2_FRAME, ALTREF_FRAME } },

  { NEAR_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEAREST_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEAR_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEW_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEAREST_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEW_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEAR_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEW_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
  { ZERO_ZEROMV, { LAST3_FRAME, ALTREF_FRAME } },
261
#endif  // CONFIG_EXT_REFS
262

263 264 265 266 267 268 269 270 271
  { NEAR_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEAREST_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEAR_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEW_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEAREST_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEW_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEAR_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEW_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { ZERO_ZEROMV, { GOLDEN_FRAME, ALTREF_FRAME } },
272 273

#if CONFIG_EXT_REFS
274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312
  { NEAR_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEAR_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEW_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEAREST_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEW_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEAR_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEW_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
  { ZERO_ZEROMV, { LAST_FRAME, BWDREF_FRAME } },

  { NEAR_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEAR_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEW_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEAREST_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEW_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEAR_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEW_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
  { ZERO_ZEROMV, { LAST2_FRAME, BWDREF_FRAME } },

  { NEAR_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEAR_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEW_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEAREST_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEW_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEAR_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEW_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
  { ZERO_ZEROMV, { LAST3_FRAME, BWDREF_FRAME } },

  { NEAR_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEAR_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEW_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEAREST_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEW_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEAR_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEW_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { ZERO_ZEROMV, { GOLDEN_FRAME, BWDREF_FRAME } },
313
#endif  // CONFIG_EXT_REFS
314 315 316

#else  // CONFIG_EXT_INTER

317 318
  { NEARMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEWMV, { LAST_FRAME, ALTREF_FRAME } },
319
#if CONFIG_EXT_REFS
320 321 322 323
  { NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
324
#endif  // CONFIG_EXT_REFS
325 326
  { NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
327 328

#if CONFIG_EXT_REFS
329 330 331 332 333 334 335 336
  { NEARMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEWMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
337
#endif  // CONFIG_EXT_REFS
Jingning Han's avatar
Jingning Han committed
338

339
  { ZEROMV, { LAST_FRAME, ALTREF_FRAME } },
340
#if CONFIG_EXT_REFS
341 342
  { ZEROMV, { LAST2_FRAME, ALTREF_FRAME } },
  { ZEROMV, { LAST3_FRAME, ALTREF_FRAME } },
343
#endif  // CONFIG_EXT_REFS
344
  { ZEROMV, { GOLDEN_FRAME, ALTREF_FRAME } },
345 346

#if CONFIG_EXT_REFS
347 348 349 350
  { ZEROMV, { LAST_FRAME, BWDREF_FRAME } },
  { ZEROMV, { LAST2_FRAME, BWDREF_FRAME } },
  { ZEROMV, { LAST3_FRAME, BWDREF_FRAME } },
  { ZEROMV, { GOLDEN_FRAME, BWDREF_FRAME } },
351
#endif  // CONFIG_EXT_REFS
352

353
#endif  // CONFIG_EXT_INTER
Jingning Han's avatar
Jingning Han committed
354

355 356 357 358 359 360 361 362
  { H_PRED, { INTRA_FRAME, NONE } },
  { V_PRED, { INTRA_FRAME, NONE } },
  { D135_PRED, { INTRA_FRAME, NONE } },
  { D207_PRED, { INTRA_FRAME, NONE } },
  { D153_PRED, { INTRA_FRAME, NONE } },
  { D63_PRED, { INTRA_FRAME, NONE } },
  { D117_PRED, { INTRA_FRAME, NONE } },
  { D45_PRED, { INTRA_FRAME, NONE } },
363 364

#if CONFIG_EXT_INTER
365 366 367 368
  { ZEROMV, { LAST_FRAME, INTRA_FRAME } },
  { NEARESTMV, { LAST_FRAME, INTRA_FRAME } },
  { NEARMV, { LAST_FRAME, INTRA_FRAME } },
  { NEWMV, { LAST_FRAME, INTRA_FRAME } },
369 370

#if CONFIG_EXT_REFS
371 372 373 374 375 376 377 378 379
  { ZEROMV, { LAST2_FRAME, INTRA_FRAME } },
  { NEARESTMV, { LAST2_FRAME, INTRA_FRAME } },
  { NEARMV, { LAST2_FRAME, INTRA_FRAME } },
  { NEWMV, { LAST2_FRAME, INTRA_FRAME } },

  { ZEROMV, { LAST3_FRAME, INTRA_FRAME } },
  { NEARESTMV, { LAST3_FRAME, INTRA_FRAME } },
  { NEARMV, { LAST3_FRAME, INTRA_FRAME } },
  { NEWMV, { LAST3_FRAME, INTRA_FRAME } },
380 381
#endif  // CONFIG_EXT_REFS

382 383 384 385
  { ZEROMV, { GOLDEN_FRAME, INTRA_FRAME } },
  { NEARESTMV, { GOLDEN_FRAME, INTRA_FRAME } },
  { NEARMV, { GOLDEN_FRAME, INTRA_FRAME } },
  { NEWMV, { GOLDEN_FRAME, INTRA_FRAME } },
386

387
#if CONFIG_EXT_REFS
388 389 390 391
  { ZEROMV, { BWDREF_FRAME, INTRA_FRAME } },
  { NEARESTMV, { BWDREF_FRAME, INTRA_FRAME } },
  { NEARMV, { BWDREF_FRAME, INTRA_FRAME } },
  { NEWMV, { BWDREF_FRAME, INTRA_FRAME } },
392 393
#endif  // CONFIG_EXT_REFS

394 395 396 397
  { ZEROMV, { ALTREF_FRAME, INTRA_FRAME } },
  { NEARESTMV, { ALTREF_FRAME, INTRA_FRAME } },
  { NEARMV, { ALTREF_FRAME, INTRA_FRAME } },
  { NEWMV, { ALTREF_FRAME, INTRA_FRAME } },
398
#endif  // CONFIG_EXT_INTER
Jingning Han's avatar
Jingning Han committed
399 400
};

Yaowu Xu's avatar
Yaowu Xu committed
401
static const REF_DEFINITION av1_ref_order[MAX_REFS] = {
402
  { { LAST_FRAME, NONE } },
403
#if CONFIG_EXT_REFS
404 405
  { { LAST2_FRAME, NONE } },          { { LAST3_FRAME, NONE } },
  { { BWDREF_FRAME, NONE } },
406
#endif  // CONFIG_EXT_REFS
407
  { { GOLDEN_FRAME, NONE } },         { { ALTREF_FRAME, NONE } },
408

409
  { { LAST_FRAME, ALTREF_FRAME } },
410
#if CONFIG_EXT_REFS
411
  { { LAST2_FRAME, ALTREF_FRAME } },  { { LAST3_FRAME, ALTREF_FRAME } },
412
#endif  // CONFIG_EXT_REFS
413
  { { GOLDEN_FRAME, ALTREF_FRAME } },
414 415

#if CONFIG_EXT_REFS
416 417
  { { LAST_FRAME, BWDREF_FRAME } },   { { LAST2_FRAME, BWDREF_FRAME } },
  { { LAST3_FRAME, BWDREF_FRAME } },  { { GOLDEN_FRAME, BWDREF_FRAME } },
418
#endif  // CONFIG_EXT_REFS
419

420
  { { INTRA_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
421 422
};

423
#if CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA || CONFIG_PALETTE
hui su's avatar
hui su committed
424 425
static INLINE int write_uniform_cost(int n, int v) {
  int l = get_unsigned_bits(n), m = (1 << l) - n;
426
  if (l == 0) return 0;
hui su's avatar
hui su committed
427
  if (v < m)
Yaowu Xu's avatar
Yaowu Xu committed
428
    return (l - 1) * av1_cost_bit(128, 0);
hui su's avatar
hui su committed
429
  else
Yaowu Xu's avatar
Yaowu Xu committed
430
    return l * av1_cost_bit(128, 0);
hui su's avatar
hui su committed
431
}
432
#endif  // CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA || CONFIG_PALETTE
hui su's avatar
hui su committed
433

434 435 436
// constants for prune 1 and prune 2 decision boundaries
#define FAST_EXT_TX_CORR_MID 0.0
#define FAST_EXT_TX_EDST_MID 0.1
437 438 439 440
#define FAST_EXT_TX_CORR_MARGIN 0.5
#define FAST_EXT_TX_EDST_MARGIN 0.3

static const TX_TYPE_1D vtx_tab[TX_TYPES] = {
441
  DCT_1D,      ADST_1D, DCT_1D,      ADST_1D,
442
#if CONFIG_EXT_TX
443 444
  FLIPADST_1D, DCT_1D,  FLIPADST_1D, ADST_1D, FLIPADST_1D, IDTX_1D,
  DCT_1D,      IDTX_1D, ADST_1D,     IDTX_1D, FLIPADST_1D, IDTX_1D,
445 446 447 448
#endif  // CONFIG_EXT_TX
};

static const TX_TYPE_1D htx_tab[TX_TYPES] = {
449
  DCT_1D,  DCT_1D,      ADST_1D,     ADST_1D,
450
#if CONFIG_EXT_TX
451 452
  DCT_1D,  FLIPADST_1D, FLIPADST_1D, FLIPADST_1D, ADST_1D, IDTX_1D,
  IDTX_1D, DCT_1D,      IDTX_1D,     ADST_1D,     IDTX_1D, FLIPADST_1D,
453 454
#endif  // CONFIG_EXT_TX
};
455

Yaowu Xu's avatar
Yaowu Xu committed
456
static void get_energy_distribution_fine(const AV1_COMP *cpi, BLOCK_SIZE bsize,
457 458 459 460 461
                                         uint8_t *src, int src_stride,
                                         uint8_t *dst, int dst_stride,
                                         double *hordist, double *verdist) {
  int bw = 4 << (b_width_log2_lookup[bsize]);
  int bh = 4 << (b_height_log2_lookup[bsize]);
462
  unsigned int esq[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
463 464
  unsigned int var[16];
  double total = 0;
465

466
  const int f_index = bsize - BLOCK_16X16;
467 468 469 470
  if (f_index < 0) {
    int i, j, index;
    int w_shift = bw == 8 ? 1 : 2;
    int h_shift = bh == 8 ? 1 : 2;
Yaowu Xu's avatar
Yaowu Xu committed
471
#if CONFIG_AOM_HIGHBITDEPTH
472 473 474 475 476 477
    if (cpi->common.use_highbitdepth) {
      uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
      uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
      for (i = 0; i < bh; ++i)
        for (j = 0; j < bw; ++j) {
          index = (j >> w_shift) + ((i >> h_shift) << 2);
478 479 480
          esq[index] +=
              (src16[j + i * src_stride] - dst16[j + i * dst_stride]) *
              (src16[j + i * src_stride] - dst16[j + i * dst_stride]);
481 482
        }
    } else {
Yaowu Xu's avatar
Yaowu Xu committed
483
#endif  // CONFIG_AOM_HIGHBITDEPTH
484 485 486 487 488 489 490

      for (i = 0; i < bh; ++i)
        for (j = 0; j < bw; ++j) {
          index = (j >> w_shift) + ((i >> h_shift) << 2);
          esq[index] += (src[j + i * src_stride] - dst[j + i * dst_stride]) *
                        (src[j + i * src_stride] - dst[j + i * dst_stride]);
        }
Yaowu Xu's avatar
Yaowu Xu committed
491
#if CONFIG_AOM_HIGHBITDEPTH
492
    }
Yaowu Xu's avatar
Yaowu Xu committed
493
#endif  // CONFIG_AOM_HIGHBITDEPTH
494
  } else {
495 496 497 498 499
    var[0] = cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[0]);
    var[1] = cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4,
                                     dst_stride, &esq[1]);
    var[2] = cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2,
                                     dst_stride, &esq[2]);
500 501 502 503 504
    var[3] = cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride,
                                     dst + 3 * bw / 4, dst_stride, &esq[3]);
    src += bh / 4 * src_stride;
    dst += bh / 4 * dst_stride;

505 506 507 508 509
    var[4] = cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[4]);
    var[5] = cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4,
                                     dst_stride, &esq[5]);
    var[6] = cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2,
                                     dst_stride, &esq[6]);
510 511 512 513 514
    var[7] = cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride,
                                     dst + 3 * bw / 4, dst_stride, &esq[7]);
    src += bh / 4 * src_stride;
    dst += bh / 4 * dst_stride;

515 516 517 518 519
    var[8] = cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[8]);
    var[9] = cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4,
                                     dst_stride, &esq[9]);
    var[10] = cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2,
                                      dst_stride, &esq[10]);
520 521 522 523 524
    var[11] = cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride,
                                      dst + 3 * bw / 4, dst_stride, &esq[11]);
    src += bh / 4 * src_stride;
    dst += bh / 4 * dst_stride;

525 526 527 528 529 530
    var[12] =
        cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[12]);
    var[13] = cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4,
                                      dst_stride, &esq[13]);
    var[14] = cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2,
                                      dst_stride, &esq[14]);
531 532 533 534
    var[15] = cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride,
                                      dst + 3 * bw / 4, dst_stride, &esq[15]);
  }

535 536 537
  total = esq[0] + esq[1] + esq[2] + esq[3] + esq[4] + esq[5] + esq[6] +
          esq[7] + esq[8] + esq[9] + esq[10] + esq[11] + esq[12] + esq[13] +
          esq[14] + esq[15];
538 539
  if (total > 0) {
    const double e_recip = 1.0 / total;
540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557
    hordist[0] =
        ((double)esq[0] + (double)esq[4] + (double)esq[8] + (double)esq[12]) *
        e_recip;
    hordist[1] =
        ((double)esq[1] + (double)esq[5] + (double)esq[9] + (double)esq[13]) *
        e_recip;
    hordist[2] =
        ((double)esq[2] + (double)esq[6] + (double)esq[10] + (double)esq[14]) *
        e_recip;
    verdist[0] =
        ((double)esq[0] + (double)esq[1] + (double)esq[2] + (double)esq[3]) *
        e_recip;
    verdist[1] =
        ((double)esq[4] + (double)esq[5] + (double)esq[6] + (double)esq[7]) *
        e_recip;
    verdist[2] =
        ((double)esq[8] + (double)esq[9] + (double)esq[10] + (double)esq[11]) *
        e_recip;
558 559 560 561 562
  } else {
    hordist[0] = verdist[0] = 0.25;
    hordist[1] = verdist[1] = 0.25;
    hordist[2] = verdist[2] = 0.25;
  }
563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578
  (void)var[0];
  (void)var[1];
  (void)var[2];
  (void)var[3];
  (void)var[4];
  (void)var[5];
  (void)var[6];
  (void)var[7];
  (void)var[8];
  (void)var[9];
  (void)var[10];
  (void)var[11];
  (void)var[12];
  (void)var[13];
  (void)var[14];
  (void)var[15];
579 580
}

Yaowu Xu's avatar
Yaowu Xu committed
581 582 583
static int adst_vs_flipadst(const AV1_COMP *cpi, BLOCK_SIZE bsize, uint8_t *src,
                            int src_stride, uint8_t *dst, int dst_stride,
                            double *hdist, double *vdist) {
584 585
  int prune_bitmask = 0;
  double svm_proj_h = 0, svm_proj_v = 0;
586 587
  get_energy_distribution_fine(cpi, bsize, src, src_stride, dst, dst_stride,
                               hdist, vdist);
588

589
  svm_proj_v = vdist[0] * ADST_FLIP_SVM[0] + vdist[1] * ADST_FLIP_SVM[1] +
590
               vdist[2] * ADST_FLIP_SVM[2] + ADST_FLIP_SVM[3];
591
  svm_proj_h = hdist[0] * ADST_FLIP_SVM[4] + hdist[1] * ADST_FLIP_SVM[5] +
592 593 594 595 596 597 598 599 600 601 602 603 604 605 606
               hdist[2] * ADST_FLIP_SVM[6] + ADST_FLIP_SVM[7];
  if (svm_proj_v > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN)
    prune_bitmask |= 1 << FLIPADST_1D;
  else if (svm_proj_v < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN)
    prune_bitmask |= 1 << ADST_1D;

  if (svm_proj_h > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN)
    prune_bitmask |= 1 << (FLIPADST_1D + 8);
  else if (svm_proj_h < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN)
    prune_bitmask |= 1 << (ADST_1D + 8);

  return prune_bitmask;
}

#if CONFIG_EXT_TX
607
static void get_horver_correlation(int16_t *diff, int stride, int w, int h,
608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635
                                   double *hcorr, double *vcorr) {
  // Returns hor/ver correlation coefficient
  const int num = (h - 1) * (w - 1);
  double num_r;
  int i, j;
  int64_t xy_sum = 0, xz_sum = 0;
  int64_t x_sum = 0, y_sum = 0, z_sum = 0;
  int64_t x2_sum = 0, y2_sum = 0, z2_sum = 0;
  double x_var_n, y_var_n, z_var_n, xy_var_n, xz_var_n;
  *hcorr = *vcorr = 1;

  assert(num > 0);
  num_r = 1.0 / num;
  for (i = 1; i < h; ++i) {
    for (j = 1; j < w; ++j) {
      const int16_t x = diff[i * stride + j];
      const int16_t y = diff[i * stride + j - 1];
      const int16_t z = diff[(i - 1) * stride + j];
      xy_sum += x * y;
      xz_sum += x * z;
      x_sum += x;
      y_sum += y;
      z_sum += z;
      x2_sum += x * x;
      y2_sum += y * y;
      z2_sum += z * z;
    }
  }
636 637 638
  x_var_n = x2_sum - (x_sum * x_sum) * num_r;
  y_var_n = y2_sum - (y_sum * y_sum) * num_r;
  z_var_n = z2_sum - (z_sum * z_sum) * num_r;
639 640 641 642 643 644 645 646 647 648 649 650
  xy_var_n = xy_sum - (x_sum * y_sum) * num_r;
  xz_var_n = xz_sum - (x_sum * z_sum) * num_r;
  if (x_var_n > 0 && y_var_n > 0) {
    *hcorr = xy_var_n / sqrt(x_var_n * y_var_n);
    *hcorr = *hcorr < 0 ? 0 : *hcorr;
  }
  if (x_var_n > 0 && z_var_n > 0) {
    *vcorr = xz_var_n / sqrt(x_var_n * z_var_n);
    *vcorr = *vcorr < 0 ? 0 : *vcorr;
  }
}

651 652
int dct_vs_idtx(int16_t *diff, int stride, int w, int h, double *hcorr,
                double *vcorr) {
653 654 655 656
  int prune_bitmask = 0;
  get_horver_correlation(diff, stride, w, h, hcorr, vcorr);

  if (*vcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN)
657
    prune_bitmask |= 1 << IDTX_1D;
658 659 660 661
  else if (*vcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN)
    prune_bitmask |= 1 << DCT_1D;

  if (*hcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN)
662
    prune_bitmask |= 1 << (IDTX_1D + 8);
663 664 665 666 667 668
  else if (*hcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN)
    prune_bitmask |= 1 << (DCT_1D + 8);
  return prune_bitmask;
}

// Performance drop: 0.5%, Speed improvement: 24%
Yaowu Xu's avatar
Yaowu Xu committed
669
static int prune_two_for_sby(const AV1_COMP *cpi, BLOCK_SIZE bsize,
670
                             MACROBLOCK *x, MACROBLOCKD *xd, int adst_flipadst,
671
                             int dct_idtx) {
672 673 674 675 676
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];
  const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
  const int bw = 4 << (b_width_log2_lookup[bs]);
  const int bh = 4 << (b_height_log2_lookup[bs]);
677
  double hdist[3] = { 0, 0, 0 }, vdist[3] = { 0, 0, 0 };
678
  double hcorr, vcorr;
679
  int prune = 0;
Yaowu Xu's avatar
Yaowu Xu committed
680
  av1_subtract_plane(x, bsize, 0);
681

682 683 684
  if (adst_flipadst)
    prune |= adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride,
                              pd->dst.buf, pd->dst.stride, hdist, vdist);
685
  if (dct_idtx) prune |= dct_vs_idtx(p->src_diff, bw, bw, bh, &hcorr, &vcorr);
686 687 688

  return prune;
}
689 690
#endif  // CONFIG_EXT_TX

691
// Performance drop: 0.3%, Speed improvement: 5%
Yaowu Xu's avatar
Yaowu Xu committed
692
static int prune_one_for_sby(const AV1_COMP *cpi, BLOCK_SIZE bsize,
693
                             MACROBLOCK *x, MACROBLOCKD *xd) {
694 695
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];
696
  double hdist[3] = { 0, 0, 0 }, vdist[3] = { 0, 0, 0 };
Yaowu Xu's avatar
Yaowu Xu committed
697
  av1_subtract_plane(x, bsize, 0);
698 699
  return adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride, pd->dst.buf,
                          pd->dst.stride, hdist, vdist);
700 701
}

Yaowu Xu's avatar
Yaowu Xu committed
702
static int prune_tx_types(const AV1_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x,
703 704 705 706
                          MACROBLOCKD *xd, int tx_set) {
#if CONFIG_EXT_TX
  const int *tx_set_1D = ext_tx_used_inter_1D[tx_set];
#else
707
  const int tx_set_1D[TX_TYPES_1D] = { 0 };
708 709
#endif

710
  switch (cpi->sf.tx_type_search.prune_mode) {
711 712
    case NO_PRUNE: return 0; break;
    case PRUNE_ONE:
713
      if ((tx_set >= 0) && !(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D]))
714
        return 0;
715 716
      return prune_one_for_sby(cpi, bsize, x, xd);
      break;
717 718
#if CONFIG_EXT_TX
    case PRUNE_TWO:
719
      if ((tx_set >= 0) && !(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D])) {
720
        if (!(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D])) return 0;
721 722
        return prune_two_for_sby(cpi, bsize, x, xd, 0, 1);
      }
723
      if ((tx_set >= 0) && !(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D]))
724 725
        return prune_two_for_sby(cpi, bsize, x, xd, 1, 0);
      return prune_two_for_sby(cpi, bsize, x, xd, 1, 1);
726
      break;
727
#endif
728 729 730 731 732
  }
  assert(0);
  return 0;
}

733
static int do_tx_type_search(TX_TYPE tx_type, int prune) {
734 735 736
// TODO(sarahparker) implement for non ext tx
#if CONFIG_EXT_TX
  return !(((prune >> vtx_tab[tx_type]) & 1) |
737
           ((prune >> (htx_tab[tx_type] + 8)) & 1));
738 739
#else
  // temporary to avoid compiler warnings
740 741 742 743
  (void)vtx_tab;
  (void)htx_tab;
  (void)tx_type;
  (void)prune;
744 745 746 747
  return 1;
#endif
}

Yaowu Xu's avatar
Yaowu Xu committed
748
static void model_rd_from_sse(const AV1_COMP *const cpi,
749 750
                              const MACROBLOCKD *const xd, BLOCK_SIZE bsize,
                              int plane, int64_t sse, int *rate,
Geza Lore's avatar
Geza Lore committed
751 752 753
                              int64_t *dist) {
  const struct macroblockd_plane *const pd = &xd->plane[plane];
  const int dequant_shift =
Yaowu Xu's avatar
Yaowu Xu committed
754
#if CONFIG_AOM_HIGHBITDEPTH
755
      (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 :
Yaowu Xu's avatar
Yaowu Xu committed
756
#endif  // CONFIG_AOM_HIGHBITDEPTH
757
                                                    3;
Geza Lore's avatar
Geza Lore committed
758 759 760 761 762 763 764

  // Fast approximate the modelling function.
  if (cpi->sf.simple_model_rd_from_var) {
    const int64_t square_error = sse;
    int quantizer = (pd->dequant[1] >> dequant_shift);

    if (quantizer < 120)
Yaowu Xu's avatar
Yaowu Xu committed
765
      *rate = (int)((square_error * (280 - quantizer)) >>
Yaowu Xu's avatar
Yaowu Xu committed
766
                    (16 - AV1_PROB_COST_SHIFT));
Geza Lore's avatar
Geza Lore committed
767 768 769 770
    else
      *rate = 0;
    *dist = (square_error * quantizer) >> 8;
  } else {
Yaowu Xu's avatar
Yaowu Xu committed
771 772
    av1_model_rd_from_var_lapndz(sse, num_pels_log2_lookup[bsize],
                                 pd->dequant[1] >> dequant_shift, rate, dist);
Geza Lore's avatar
Geza Lore committed
773 774 775 776 777
  }

  *dist <<= 4;
}

Yaowu Xu's avatar
Yaowu Xu committed
778
static void model_rd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
779 780 781 782
                            MACROBLOCK *x, MACROBLOCKD *xd, int plane_from,
                            int plane_to, int *out_rate_sum,
                            int64_t *out_dist_sum, int *skip_txfm_sb,
                            int64_t *skip_sse_sb) {
Jingning Han's avatar
Jingning Han committed
783 784 785
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
Geza Lore's avatar
Geza Lore committed
786 787 788
  int plane;
  const int ref = xd->mi[0]->mbmi.ref_frame[0];

Jingning Han's avatar
Jingning Han committed
789 790 791 792 793 794
  int64_t rate_sum = 0;
  int64_t dist_sum = 0;
  int64_t total_sse = 0;

  x->pred_sse[ref] = 0;

Geza Lore's avatar
Geza Lore committed
795 796 797
  for (plane = plane_from; plane <= plane_to; ++plane) {
    struct macroblock_plane *const p = &x->plane[plane];
    struct macroblockd_plane *const pd = &xd->plane[plane];
Jingning Han's avatar
Jingning Han committed
798 799
    const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);

Geza Lore's avatar
Geza Lore committed
800 801 802
    unsigned int sse;
    int rate;
    int64_t dist;
Jingning Han's avatar
Jingning Han committed
803

Geza Lore's avatar
Geza Lore committed
804 805
    // TODO(geza): Write direct sse functions that do not compute
    // variance as well.
806 807
    cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride,
                       &sse);
Jingning Han's avatar
Jingning Han committed
808

809
    if (plane == 0) x->pred_sse[ref] = sse;
Jingning Han's avatar
Jingning Han committed
810

Geza Lore's avatar
Geza Lore committed
811
    total_sse += sse;
Jingning Han's avatar
Jingning Han committed
812

Geza Lore's avatar
Geza Lore committed
813
    model_rd_from_sse(cpi, xd, bs, plane, sse, &rate, &dist);
Geza Lore's avatar
Geza Lore committed
814 815 816

    rate_sum += rate;
    dist_sum += dist;
Jingning Han's avatar
Jingning Han committed
817 818
  }

Geza Lore's avatar
Geza Lore committed
819
  *skip_txfm_sb = total_sse == 0;
Jingning Han's avatar
Jingning Han committed
820 821
  *skip_sse_sb = total_sse << 4;
  *out_rate_sum = (int)rate_sum;
Geza Lore's avatar
Geza Lore committed
822
  *out_dist_sum = dist_sum;
Jingning Han's avatar
Jingning Han committed
823 824
}

825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851
#if CONFIG_PVQ
// Without PVQ, av1_block_error_c() return two kind of errors,
// 1) reconstruction (i.e. decoded) error and
// 2) Squared sum of transformed residue (i.e. 'coeff')
// However, if PVQ is enabled, coeff does not keep the transformed residue
// but instead a transformed original is kept.
// Hence, new parameter ref vector (i.e. transformed predicted signal)
// is required to derive the residue signal,
// i.e. coeff - ref = residue (all transformed).

// TODO(yushin) : Since 4x4 case does not need ssz, better to refactor into
// a separate function that does not do the extra computations for ssz.
int64_t av1_block_error2_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
                           const tran_low_t *ref, intptr_t block_size,
                           int64_t *ssz) {
  int64_t error;

  // Use the existing sse codes for calculating distortion of decoded signal:
  // i.e. (orig - decoded)^2
  error = av1_block_error_fp(coeff, dqcoeff, block_size);
  // prediction residue^2 = (orig - ref)^2
  *ssz = av1_block_error_fp(coeff, ref, block_size);

  return error;
}
#endif

Yaowu Xu's avatar
Yaowu Xu committed
852 853
int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
                          intptr_t block_size, int64_t *ssz) {
Jingning Han's avatar
Jingning Han committed
854 855 856 857 858
  int i;
  int64_t error = 0, sqcoeff = 0;

  for (i = 0; i < block_size; i++) {
    const int diff = coeff[i] - dqcoeff[i];
859
    error += diff * diff;
Jingning Han's avatar
Jingning Han committed
860 861 862 863 864 865 866
    sqcoeff += coeff[i] * coeff[i];
  }

  *ssz = sqcoeff;
  return error;
}

Yaowu Xu's avatar
Yaowu Xu committed
867 868
int64_t av1_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff,
                             int block_size) {
Jingning Han's avatar
Jingning Han committed
869 870 871 872 873
  int i;
  int64_t error = 0;

  for (i = 0; i < block_size; i++) {
    const int diff = coeff[i] - dqcoeff[i];
874
    error += diff * diff;
Jingning Han's avatar
Jingning Han committed
875 876 877 878 879
  }

  return error;
}

Yaowu Xu's avatar
Yaowu Xu committed
880 881 882 883
#if CONFIG_AOM_HIGHBITDEPTH
int64_t av1_highbd_block_error_c(const tran_low_t *coeff,
                                 const tran_low_t *dqcoeff, intptr_t block_size,
                                 int64_t *ssz, int bd) {
Jingning Han's avatar
Jingning Han committed
884 885 886 887 888 889 890
  int i;
  int64_t error = 0, sqcoeff = 0;
  int shift = 2 * (bd - 8);
  int rounding = shift > 0 ? 1 << (shift - 1) : 0;

  for (i = 0; i < block_size; i++) {
    const int64_t diff = coeff[i] - dqcoeff[i];
891
    error += diff * diff;
Jingning Han's avatar
Jingning Han committed
892 893 894 895 896 897 898 899 900
    sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
  }
  assert(error >= 0 && sqcoeff >= 0);
  error = (error + rounding) >> shift;
  sqcoeff = (sqcoeff + rounding) >> shift;

  *ssz = sqcoeff;
  return error;
}
Yaowu Xu's avatar
Yaowu Xu committed
901
#endif  // CONFIG_AOM_HIGHBITDEPTH
Jingning Han's avatar
Jingning Han committed
902

903
#if !CONFIG_PVQ
904
/* The trailing '0' is a terminator which is used inside av1_cost_coeffs() to
Jingning Han's avatar
Jingning Han committed
905 906 907 908
 * decide whether to include cost of a trailing EOB node or not (i.e. we
 * can skip this if the last coefficient in this transform block, e.g. the
 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
 * were non-zero). */
909 910 911
int av1_cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane,
                    int block, int coeff_ctx, TX_SIZE tx_size,
                    const int16_t *scan, const int16_t *nb,
912
                    int use_fast_coef_costing) {
Jingning Han's avatar
Jingning Han committed
913 914 915 916 917
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
  const struct macroblock_plane *p = &x->plane[plane];
  const struct macroblockd_plane *pd = &xd->plane[plane];
  const PLANE_TYPE type = pd->plane_type;
918
  const uint16_t *band_count = &band_count_table[tx_size][1];
Jingning Han's avatar
Jingning Han committed
919 920
  const int eob = p->eobs[block];
  const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
921
  const int tx_size_ctx = txsize_sqr_map[tx_size];
922 923
  unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
      x->token_costs[tx_size_ctx][type][is_inter_block(mbmi)];
924
  uint8_t token_cache[MAX_TX_SQUARE];
925
  int pt = coeff_ctx;
Jingning Han's avatar
Jingning Han committed
926
  int c, cost;
Yaowu Xu's avatar
Yaowu Xu committed
927 928
#if CONFIG_AOM_HIGHBITDEPTH
  const int *cat6_high_cost = av1_get_high_cost_table(xd->bd);
Jingning Han's avatar
Jingning Han committed
929
#else
Yaowu Xu's avatar
Yaowu Xu committed
930
  const int *cat6_high_cost = av1_get_high_cost_table(8);
Jingning Han's avatar
Jingning Han committed
931 932
#endif

933
#if !CONFIG_VAR_TX && !CONFIG_SUPERTX
Jingning Han's avatar
Jingning Han committed
934 935 936
  // Check for consistency of tx_size with mode info
  assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
                              : get_uv_tx_size(mbmi, pd) == tx_size);
937
#endif  // !CONFIG_VAR_TX && !CONFIG_SUPERTX
938
  (void)cm;
Jingning Han's avatar
Jingning Han committed
939 940 941 942 943 944

  if (eob == 0) {
    // single eob token
    cost = token_costs[0][0][pt][EOB_TOKEN];
    c = 0;
  } else {
Julia Robson's avatar
Julia Robson committed
945 946 947 948 949 950
    if (use_fast_coef_costing) {
      int band_left = *band_count++;

      // dc token
      int v = qcoeff[0];
      int16_t prev_t;
Yaowu Xu's avatar
Yaowu Xu committed
951
      cost = av1_get_token_cost(v, &prev_t, cat6_high_cost);
Julia Robson's avatar
Julia Robson committed
952 953
      cost += (*token_costs)[0][pt][prev_t];

Yaowu Xu's avatar
Yaowu Xu committed
954
      token_cache[0] = av1_pt_energy_class[prev_t];
Julia Robson's avatar
Julia Robson committed
955 956 957 958 959 960 961 962
      ++token_costs;

      // ac tokens
      for (c = 1; c < eob; c++) {
        const int rc = scan[c];
        int16_t t;

        v = qcoeff[rc];
Yaowu Xu's avatar
Yaowu Xu committed
963
        cost += av1_get_token_cost(v, &t, cat6_high_cost);
Julia Robson's avatar
Julia Robson committed
964 965 966 967 968 969
        cost += (*token_costs)[!prev_t][!prev_t][t];
        prev_t = t;
        if (!--band_left) {
          band_left = *band_count++;
          ++token_costs;
        }
Jingning Han's avatar
Jingning Han committed
970 971
      }

Julia Robson's avatar
Julia Robson committed
972
      // eob token
973
      if (band_left) cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
Julia Robson's avatar
Julia Robson committed
974 975 976 977 978 979 980