rdopt.c 423 KB
Newer Older
Jingning Han's avatar
Jingning Han committed
1
/*
Yaowu Xu's avatar
Yaowu Xu committed
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Jingning Han's avatar
Jingning Han committed
3
 *
Yaowu Xu's avatar
Yaowu Xu committed
4 5 6 7 8 9
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Jingning Han's avatar
Jingning Han committed
10 11 12 13 14
 */

#include <assert.h>
#include <math.h>

Yaowu Xu's avatar
Yaowu Xu committed
15
#include "./aom_dsp_rtcd.h"
Jingning Han's avatar
Jingning Han committed
16
#include "./av1_rtcd.h"
Jingning Han's avatar
Jingning Han committed
17

Yaowu Xu's avatar
Yaowu Xu committed
18
#include "aom_dsp/aom_dsp_common.h"
19
#include "aom_dsp/blend.h"
Yaowu Xu's avatar
Yaowu Xu committed
20
#include "aom_mem/aom_mem.h"
21 22
#include "aom_ports/mem.h"
#include "aom_ports/system_state.h"
Jingning Han's avatar
Jingning Han committed
23

24 25 26 27 28 29 30 31 32 33 34 35
#include "av1/common/common.h"
#include "av1/common/common_data.h"
#include "av1/common/entropy.h"
#include "av1/common/entropymode.h"
#include "av1/common/idct.h"
#include "av1/common/mvref_common.h"
#include "av1/common/pred_common.h"
#include "av1/common/quant_common.h"
#include "av1/common/reconinter.h"
#include "av1/common/reconintra.h"
#include "av1/common/scan.h"
#include "av1/common/seg_common.h"
36 37 38
#if CONFIG_LV_MAP
#include "av1/common/txb_common.h"
#endif
Yue Chen's avatar
Yue Chen committed
39 40 41
#if CONFIG_WARPED_MOTION
#include "av1/common/warped_motion.h"
#endif  // CONFIG_WARPED_MOTION
Jingning Han's avatar
Jingning Han committed
42

Jingning Han's avatar
Jingning Han committed
43
#include "av1/encoder/aq_variance.h"
44
#include "av1/encoder/av1_quantize.h"
45 46 47 48
#include "av1/encoder/cost.h"
#include "av1/encoder/encodemb.h"
#include "av1/encoder/encodemv.h"
#include "av1/encoder/encoder.h"
49 50 51
#if CONFIG_LV_MAP
#include "av1/encoder/encodetxb.h"
#endif
52 53
#include "av1/encoder/hybrid_fwd_txfm.h"
#include "av1/encoder/mcomp.h"
54
#if CONFIG_PALETTE
55
#include "av1/encoder/palette.h"
56
#endif  // CONFIG_PALETTE
57 58 59
#include "av1/encoder/ratectrl.h"
#include "av1/encoder/rd.h"
#include "av1/encoder/rdopt.h"
60
#include "av1/encoder/tokenize.h"
61 62
#if CONFIG_PVQ
#include "av1/encoder/pvq_encoder.h"
63
#endif  // CONFIG_PVQ
Yushin Cho's avatar
Yushin Cho committed
64 65
#if CONFIG_PVQ || CONFIG_DAALA_DIST
#include "av1/common/pvq.h"
66
#endif  // CONFIG_PVQ || CONFIG_DAALA_DIST
67
#if CONFIG_DUAL_FILTER
Angie Chiang's avatar
Angie Chiang committed
68
#define DUAL_FILTER_SET_SIZE (SWITCHABLE_FILTERS * SWITCHABLE_FILTERS)
69
#if USE_EXTRA_FILTER
Angie Chiang's avatar
Angie Chiang committed
70
static const int filter_sets[DUAL_FILTER_SET_SIZE][2] = {
71 72 73
  { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 }, { 1, 0 }, { 1, 1 },
  { 1, 2 }, { 1, 3 }, { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 },
  { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 },
74
};
75 76 77 78 79 80
#else   // USE_EXTRA_FILTER
static const int filter_sets[DUAL_FILTER_SET_SIZE][2] = {
  { 0, 0 }, { 0, 1 }, { 0, 2 }, { 1, 0 }, { 1, 1 },
  { 1, 2 }, { 2, 0 }, { 2, 1 }, { 2, 2 },
};
#endif  // USE_EXTRA_FILTER
Angie Chiang's avatar
Angie Chiang committed
81
#endif  // CONFIG_DUAL_FILTER
82

83 84
#if CONFIG_EXT_REFS

85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
#define LAST_FRAME_MODE_MASK                                      \
  ((1 << INTRA_FRAME) | (1 << LAST2_FRAME) | (1 << LAST3_FRAME) | \
   (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
#define LAST2_FRAME_MODE_MASK                                    \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST3_FRAME) | \
   (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
#define LAST3_FRAME_MODE_MASK                                    \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
   (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
#define GOLDEN_FRAME_MODE_MASK                                   \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
   (1 << LAST3_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
#define BWDREF_FRAME_MODE_MASK                                   \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
   (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME))
#define ALTREF_FRAME_MODE_MASK                                   \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
   (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME))

#else

#define LAST_FRAME_MODE_MASK \
  ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
#define GOLDEN_FRAME_MODE_MASK \
  ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
#define ALTREF_FRAME_MODE_MASK \
  ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | (1 << INTRA_FRAME))
Jingning Han's avatar
Jingning Han committed
112

113 114
#endif  // CONFIG_EXT_REFS

115
#if CONFIG_EXT_REFS
116
#define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | (1 << BWDREF_FRAME) | 0x01)
117
#else
118
#define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | 0x01)
119
#endif  // CONFIG_EXT_REFS
Jingning Han's avatar
Jingning Han committed
120

121 122
#define MIN_EARLY_TERM_INDEX 3
#define NEW_MV_DISCOUNT_FACTOR 8
Jingning Han's avatar
Jingning Han committed
123

124 125 126 127 128
#if CONFIG_EXT_INTRA
#define ANGLE_SKIP_THRESH 10
#define FILTER_FAST_SEARCH 1
#endif  // CONFIG_EXT_INTRA

129 130
const double ADST_FLIP_SVM[8] = { -6.6623, -2.8062, -3.2531, 3.1671,    // vert
                                  -7.7051, -3.2234, -3.6193, 3.4533 };  // horz
131

Jingning Han's avatar
Jingning Han committed
132 133 134 135 136
typedef struct {
  PREDICTION_MODE mode;
  MV_REFERENCE_FRAME ref_frame[2];
} MODE_DEFINITION;

137
typedef struct { MV_REFERENCE_FRAME ref_frame[2]; } REF_DEFINITION;
Jingning Han's avatar
Jingning Han committed
138 139

struct rdcost_block_args {
Yaowu Xu's avatar
Yaowu Xu committed
140
  const AV1_COMP *cpi;
Jingning Han's avatar
Jingning Han committed
141
  MACROBLOCK *x;
142 143
  ENTROPY_CONTEXT t_above[2 * MAX_MIB_SIZE];
  ENTROPY_CONTEXT t_left[2 * MAX_MIB_SIZE];
144
  RD_STATS rd_stats;
Jingning Han's avatar
Jingning Han committed
145 146 147 148 149 150 151
  int64_t this_rd;
  int64_t best_rd;
  int exit_early;
  int use_fast_coef_costing;
};

#define LAST_NEW_MV_INDEX 6
Yaowu Xu's avatar
Yaowu Xu committed
152
static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
Emil Keyder's avatar
Emil Keyder committed
153
  { NEARESTMV, { LAST_FRAME, NONE_FRAME } },
154
#if CONFIG_EXT_REFS
Emil Keyder's avatar
Emil Keyder committed
155 156 157
  { NEARESTMV, { LAST2_FRAME, NONE_FRAME } },
  { NEARESTMV, { LAST3_FRAME, NONE_FRAME } },
  { NEARESTMV, { BWDREF_FRAME, NONE_FRAME } },
158
#endif  // CONFIG_EXT_REFS
Emil Keyder's avatar
Emil Keyder committed
159 160
  { NEARESTMV, { ALTREF_FRAME, NONE_FRAME } },
  { NEARESTMV, { GOLDEN_FRAME, NONE_FRAME } },
Jingning Han's avatar
Jingning Han committed
161

Emil Keyder's avatar
Emil Keyder committed
162
  { DC_PRED, { INTRA_FRAME, NONE_FRAME } },
Jingning Han's avatar
Jingning Han committed
163

Emil Keyder's avatar
Emil Keyder committed
164
  { NEWMV, { LAST_FRAME, NONE_FRAME } },
165
#if CONFIG_EXT_REFS
Emil Keyder's avatar
Emil Keyder committed
166 167 168
  { NEWMV, { LAST2_FRAME, NONE_FRAME } },
  { NEWMV, { LAST3_FRAME, NONE_FRAME } },
  { NEWMV, { BWDREF_FRAME, NONE_FRAME } },
169
#endif  // CONFIG_EXT_REFS
Emil Keyder's avatar
Emil Keyder committed
170 171
  { NEWMV, { ALTREF_FRAME, NONE_FRAME } },
  { NEWMV, { GOLDEN_FRAME, NONE_FRAME } },
Jingning Han's avatar
Jingning Han committed
172

Emil Keyder's avatar
Emil Keyder committed
173
  { NEARMV, { LAST_FRAME, NONE_FRAME } },
174
#if CONFIG_EXT_REFS
Emil Keyder's avatar
Emil Keyder committed
175 176 177
  { NEARMV, { LAST2_FRAME, NONE_FRAME } },
  { NEARMV, { LAST3_FRAME, NONE_FRAME } },
  { NEARMV, { BWDREF_FRAME, NONE_FRAME } },
178
#endif  // CONFIG_EXT_REFS
Emil Keyder's avatar
Emil Keyder committed
179 180
  { NEARMV, { ALTREF_FRAME, NONE_FRAME } },
  { NEARMV, { GOLDEN_FRAME, NONE_FRAME } },
Jingning Han's avatar
Jingning Han committed
181

Emil Keyder's avatar
Emil Keyder committed
182
  { ZEROMV, { LAST_FRAME, NONE_FRAME } },
183
#if CONFIG_EXT_REFS
Emil Keyder's avatar
Emil Keyder committed
184 185 186
  { ZEROMV, { LAST2_FRAME, NONE_FRAME } },
  { ZEROMV, { LAST3_FRAME, NONE_FRAME } },
  { ZEROMV, { BWDREF_FRAME, NONE_FRAME } },
187
#endif  // CONFIG_EXT_REFS
Emil Keyder's avatar
Emil Keyder committed
188 189
  { ZEROMV, { GOLDEN_FRAME, NONE_FRAME } },
  { ZEROMV, { ALTREF_FRAME, NONE_FRAME } },
Jingning Han's avatar
Jingning Han committed
190

191
// TODO(zoeliu): May need to reconsider the order on the modes to check
192

193
#if CONFIG_EXT_INTER
194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243

#if CONFIG_COMPOUND_SINGLEREF
  // Single ref comp mode
  { SR_NEAREST_NEARMV, { LAST_FRAME, NONE_FRAME } },
#if CONFIG_EXT_REFS
  { SR_NEAREST_NEARMV, { LAST2_FRAME, NONE_FRAME } },
  { SR_NEAREST_NEARMV, { LAST3_FRAME, NONE_FRAME } },
  { SR_NEAREST_NEARMV, { BWDREF_FRAME, NONE_FRAME } },
#endif  // CONFIG_EXT_REFS
  { SR_NEAREST_NEARMV, { GOLDEN_FRAME, NONE_FRAME } },
  { SR_NEAREST_NEARMV, { ALTREF_FRAME, NONE_FRAME } },

  /*
  { SR_NEAREST_NEWMV, { LAST_FRAME, NONE_FRAME } },
#if CONFIG_EXT_REFS
  { SR_NEAREST_NEWMV, { LAST2_FRAME, NONE_FRAME } },
  { SR_NEAREST_NEWMV, { LAST3_FRAME, NONE_FRAME } },
  { SR_NEAREST_NEWMV, { BWDREF_FRAME, NONE_FRAME } },
#endif  // CONFIG_EXT_REFS
  { SR_NEAREST_NEWMV, { GOLDEN_FRAME, NONE_FRAME } },
  { SR_NEAREST_NEWMV, { ALTREF_FRAME, NONE_FRAME } },*/

  { SR_NEAR_NEWMV, { LAST_FRAME, NONE_FRAME } },
#if CONFIG_EXT_REFS
  { SR_NEAR_NEWMV, { LAST2_FRAME, NONE_FRAME } },
  { SR_NEAR_NEWMV, { LAST3_FRAME, NONE_FRAME } },
  { SR_NEAR_NEWMV, { BWDREF_FRAME, NONE_FRAME } },
#endif  // CONFIG_EXT_REFS
  { SR_NEAR_NEWMV, { GOLDEN_FRAME, NONE_FRAME } },
  { SR_NEAR_NEWMV, { ALTREF_FRAME, NONE_FRAME } },

  { SR_ZERO_NEWMV, { LAST_FRAME, NONE_FRAME } },
#if CONFIG_EXT_REFS
  { SR_ZERO_NEWMV, { LAST2_FRAME, NONE_FRAME } },
  { SR_ZERO_NEWMV, { LAST3_FRAME, NONE_FRAME } },
  { SR_ZERO_NEWMV, { BWDREF_FRAME, NONE_FRAME } },
#endif  // CONFIG_EXT_REFS
  { SR_ZERO_NEWMV, { GOLDEN_FRAME, NONE_FRAME } },
  { SR_ZERO_NEWMV, { ALTREF_FRAME, NONE_FRAME } },

  { SR_NEW_NEWMV, { LAST_FRAME, NONE_FRAME } },
#if CONFIG_EXT_REFS
  { SR_NEW_NEWMV, { LAST2_FRAME, NONE_FRAME } },
  { SR_NEW_NEWMV, { LAST3_FRAME, NONE_FRAME } },
  { SR_NEW_NEWMV, { BWDREF_FRAME, NONE_FRAME } },
#endif  // CONFIG_EXT_REFS
  { SR_NEW_NEWMV, { GOLDEN_FRAME, NONE_FRAME } },
  { SR_NEW_NEWMV, { ALTREF_FRAME, NONE_FRAME } },
#endif  // CONFIG_COMPOUND_SINGLEREF

244
  { NEAREST_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
245
#if CONFIG_EXT_REFS
246 247
  { NEAREST_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEAREST_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
248
#endif  // CONFIG_EXT_REFS
249
  { NEAREST_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
250
#if CONFIG_EXT_REFS
251 252 253 254
  { NEAREST_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
255
#endif  // CONFIG_EXT_REFS
256

257
#else  // CONFIG_EXT_INTER
258

259
  { NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
260
#if CONFIG_EXT_REFS
261 262
  { NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
263
#endif  // CONFIG_EXT_REFS
264
  { NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
265
#if CONFIG_EXT_REFS
266 267 268 269
  { NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
270
#endif  // CONFIG_EXT_REFS
271
#endif  // CONFIG_EXT_INTER
Jingning Han's avatar
Jingning Han committed
272

Emil Keyder's avatar
Emil Keyder committed
273
  { TM_PRED, { INTRA_FRAME, NONE_FRAME } },
Jingning Han's avatar
Jingning Han committed
274

275
#if CONFIG_ALT_INTRA
Emil Keyder's avatar
Emil Keyder committed
276
  { SMOOTH_PRED, { INTRA_FRAME, NONE_FRAME } },
Urvang Joshi's avatar
Urvang Joshi committed
277 278 279 280
#if CONFIG_SMOOTH_HV
  { SMOOTH_V_PRED, { INTRA_FRAME, NONE_FRAME } },
  { SMOOTH_H_PRED, { INTRA_FRAME, NONE_FRAME } },
#endif  // CONFIG_SMOOTH_HV
281 282
#endif  // CONFIG_ALT_INTRA

283
#if CONFIG_EXT_INTER
284 285 286 287 288 289 290
  { NEAR_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEW_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEAREST_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEW_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEAR_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEW_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
  { ZERO_ZEROMV, { LAST_FRAME, ALTREF_FRAME } },
291

292
#if CONFIG_EXT_REFS
293 294 295 296 297 298 299 300 301 302 303 304 305 306 307
  { NEAR_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEW_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEAREST_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEW_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEAR_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEW_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
  { ZERO_ZEROMV, { LAST2_FRAME, ALTREF_FRAME } },

  { NEAR_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEW_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEAREST_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEW_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEAR_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEW_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
  { ZERO_ZEROMV, { LAST3_FRAME, ALTREF_FRAME } },
308
#endif  // CONFIG_EXT_REFS
309

310 311 312 313 314 315 316
  { NEAR_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEW_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEAREST_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEW_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEAR_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEW_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { ZERO_ZEROMV, { GOLDEN_FRAME, ALTREF_FRAME } },
317 318

#if CONFIG_EXT_REFS
319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349
  { NEAR_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEW_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEAREST_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEW_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEAR_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEW_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
  { ZERO_ZEROMV, { LAST_FRAME, BWDREF_FRAME } },

  { NEAR_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEW_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEAREST_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEW_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEAR_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEW_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
  { ZERO_ZEROMV, { LAST2_FRAME, BWDREF_FRAME } },

  { NEAR_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEW_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEAREST_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEW_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEAR_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEW_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
  { ZERO_ZEROMV, { LAST3_FRAME, BWDREF_FRAME } },

  { NEAR_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEW_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEAREST_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEW_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEAR_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEW_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { ZERO_ZEROMV, { GOLDEN_FRAME, BWDREF_FRAME } },
350
#endif  // CONFIG_EXT_REFS
351 352 353

#else  // CONFIG_EXT_INTER

354 355
  { NEARMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEWMV, { LAST_FRAME, ALTREF_FRAME } },
356
#if CONFIG_EXT_REFS
357 358 359 360
  { NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
361
#endif  // CONFIG_EXT_REFS
362 363
  { NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
364 365

#if CONFIG_EXT_REFS
366 367 368 369 370 371 372 373
  { NEARMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEWMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
374
#endif  // CONFIG_EXT_REFS
Jingning Han's avatar
Jingning Han committed
375

376
  { ZEROMV, { LAST_FRAME, ALTREF_FRAME } },
377
#if CONFIG_EXT_REFS
378 379
  { ZEROMV, { LAST2_FRAME, ALTREF_FRAME } },
  { ZEROMV, { LAST3_FRAME, ALTREF_FRAME } },
380
#endif  // CONFIG_EXT_REFS
381
  { ZEROMV, { GOLDEN_FRAME, ALTREF_FRAME } },
382 383

#if CONFIG_EXT_REFS
384 385 386 387
  { ZEROMV, { LAST_FRAME, BWDREF_FRAME } },
  { ZEROMV, { LAST2_FRAME, BWDREF_FRAME } },
  { ZEROMV, { LAST3_FRAME, BWDREF_FRAME } },
  { ZEROMV, { GOLDEN_FRAME, BWDREF_FRAME } },
388
#endif  // CONFIG_EXT_REFS
389

390
#endif  // CONFIG_EXT_INTER
Jingning Han's avatar
Jingning Han committed
391

Emil Keyder's avatar
Emil Keyder committed
392 393 394 395 396 397 398 399
  { H_PRED, { INTRA_FRAME, NONE_FRAME } },
  { V_PRED, { INTRA_FRAME, NONE_FRAME } },
  { D135_PRED, { INTRA_FRAME, NONE_FRAME } },
  { D207_PRED, { INTRA_FRAME, NONE_FRAME } },
  { D153_PRED, { INTRA_FRAME, NONE_FRAME } },
  { D63_PRED, { INTRA_FRAME, NONE_FRAME } },
  { D117_PRED, { INTRA_FRAME, NONE_FRAME } },
  { D45_PRED, { INTRA_FRAME, NONE_FRAME } },
400 401

#if CONFIG_EXT_INTER
402 403 404 405
  { ZEROMV, { LAST_FRAME, INTRA_FRAME } },
  { NEARESTMV, { LAST_FRAME, INTRA_FRAME } },
  { NEARMV, { LAST_FRAME, INTRA_FRAME } },
  { NEWMV, { LAST_FRAME, INTRA_FRAME } },
406 407

#if CONFIG_EXT_REFS
408 409 410 411 412 413 414 415 416
  { ZEROMV, { LAST2_FRAME, INTRA_FRAME } },
  { NEARESTMV, { LAST2_FRAME, INTRA_FRAME } },
  { NEARMV, { LAST2_FRAME, INTRA_FRAME } },
  { NEWMV, { LAST2_FRAME, INTRA_FRAME } },

  { ZEROMV, { LAST3_FRAME, INTRA_FRAME } },
  { NEARESTMV, { LAST3_FRAME, INTRA_FRAME } },
  { NEARMV, { LAST3_FRAME, INTRA_FRAME } },
  { NEWMV, { LAST3_FRAME, INTRA_FRAME } },
417 418
#endif  // CONFIG_EXT_REFS

419 420 421 422
  { ZEROMV, { GOLDEN_FRAME, INTRA_FRAME } },
  { NEARESTMV, { GOLDEN_FRAME, INTRA_FRAME } },
  { NEARMV, { GOLDEN_FRAME, INTRA_FRAME } },
  { NEWMV, { GOLDEN_FRAME, INTRA_FRAME } },
423

424
#if CONFIG_EXT_REFS
425 426 427 428
  { ZEROMV, { BWDREF_FRAME, INTRA_FRAME } },
  { NEARESTMV, { BWDREF_FRAME, INTRA_FRAME } },
  { NEARMV, { BWDREF_FRAME, INTRA_FRAME } },
  { NEWMV, { BWDREF_FRAME, INTRA_FRAME } },
429 430
#endif  // CONFIG_EXT_REFS

431 432 433 434
  { ZEROMV, { ALTREF_FRAME, INTRA_FRAME } },
  { NEARESTMV, { ALTREF_FRAME, INTRA_FRAME } },
  { NEARMV, { ALTREF_FRAME, INTRA_FRAME } },
  { NEWMV, { ALTREF_FRAME, INTRA_FRAME } },
435
#endif  // CONFIG_EXT_INTER
Jingning Han's avatar
Jingning Han committed
436 437
};

438
#if CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA || CONFIG_PALETTE
hui su's avatar
hui su committed
439
static INLINE int write_uniform_cost(int n, int v) {
440 441
  const int l = get_unsigned_bits(n);
  const int m = (1 << l) - n;
442
  if (l == 0) return 0;
hui su's avatar
hui su committed
443
  if (v < m)
Yaowu Xu's avatar
Yaowu Xu committed
444
    return (l - 1) * av1_cost_bit(128, 0);
hui su's avatar
hui su committed
445
  else
Yaowu Xu's avatar
Yaowu Xu committed
446
    return l * av1_cost_bit(128, 0);
hui su's avatar
hui su committed
447
}
448
#endif  // CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA || CONFIG_PALETTE
hui su's avatar
hui su committed
449

450 451 452
// constants for prune 1 and prune 2 decision boundaries
#define FAST_EXT_TX_CORR_MID 0.0
#define FAST_EXT_TX_EDST_MID 0.1
453 454 455
#define FAST_EXT_TX_CORR_MARGIN 0.5
#define FAST_EXT_TX_EDST_MARGIN 0.3

Yushin Cho's avatar
Yushin Cho committed
456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476
#if CONFIG_DAALA_DIST
static int od_compute_var_4x4(od_coeff *x, int stride) {
  int sum;
  int s2;
  int i;
  sum = 0;
  s2 = 0;
  for (i = 0; i < 4; i++) {
    int j;
    for (j = 0; j < 4; j++) {
      int t;

      t = x[i * stride + j];
      sum += t;
      s2 += t * t;
    }
  }
  // TODO(yushin) : Check wheter any changes are required for high bit depth.
  return (s2 - (sum * sum >> 4)) >> 4;
}

477 478 479 480 481 482 483
/* OD_DIST_LP_MID controls the frequency weighting filter used for computing
   the distortion. For a value X, the filter is [1 X 1]/(X + 2) and
   is applied both horizontally and vertically. For X=5, the filter is
   a good approximation for the OD_QM8_Q4_HVS quantization matrix. */
#define OD_DIST_LP_MID (5)
#define OD_DIST_LP_NORM (OD_DIST_LP_MID + 2)

Yushin Cho's avatar
Yushin Cho committed
484
static double od_compute_dist_8x8(int qm, int use_activity_masking, od_coeff *x,
485
                                  od_coeff *y, od_coeff *e_lp, int stride) {
Yushin Cho's avatar
Yushin Cho committed
486 487 488 489 490 491 492 493 494 495 496 497
  double sum;
  int min_var;
  double mean_var;
  double var_stat;
  double activity;
  double calibration;
  int i;
  int j;
  double vardist;

  vardist = 0;
  OD_ASSERT(qm != OD_FLAT_QM);
498
  (void)qm;
Yushin Cho's avatar
Yushin Cho committed
499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529
#if 1
  min_var = INT_MAX;
  mean_var = 0;
  for (i = 0; i < 3; i++) {
    for (j = 0; j < 3; j++) {
      int varx;
      int vary;
      varx = od_compute_var_4x4(x + 2 * i * stride + 2 * j, stride);
      vary = od_compute_var_4x4(y + 2 * i * stride + 2 * j, stride);
      min_var = OD_MINI(min_var, varx);
      mean_var += 1. / (1 + varx);
      /* The cast to (double) is to avoid an overflow before the sqrt.*/
      vardist += varx - 2 * sqrt(varx * (double)vary) + vary;
    }
  }
  /* We use a different variance statistic depending on whether activity
     masking is used, since the harmonic mean appeared slghtly worse with
     masking off. The calibration constant just ensures that we preserve the
     rate compared to activity=1. */
  if (use_activity_masking) {
    calibration = 1.95;
    var_stat = 9. / mean_var;
  } else {
    calibration = 1.62;
    var_stat = min_var;
  }
  /* 1.62 is a calibration constant, 0.25 is a noise floor and 1/6 is the
     activity masking constant. */
  activity = calibration * pow(.25 + var_stat, -1. / 6);
#else
  activity = 1;
530
#endif  // 1
Yushin Cho's avatar
Yushin Cho committed
531 532 533
  sum = 0;
  for (i = 0; i < 8; i++) {
    for (j = 0; j < 8; j++)
534
      sum += e_lp[i * stride + j] * (double)e_lp[i * stride + j];
Yushin Cho's avatar
Yushin Cho committed
535
  }
536 537 538
  /* Normalize the filter to unit DC response. */
  sum *= 1. / (OD_DIST_LP_NORM * OD_DIST_LP_NORM * OD_DIST_LP_NORM *
               OD_DIST_LP_NORM);
Yushin Cho's avatar
Yushin Cho committed
539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558
  return activity * activity * (sum + vardist);
}

// Note : Inputs x and y are in a pixel domain
static double od_compute_dist(int qm, int activity_masking, od_coeff *x,
                              od_coeff *y, int bsize_w, int bsize_h,
                              int qindex) {
  int i;
  double sum;
  sum = 0;

  assert(bsize_w >= 8 && bsize_h >= 8);

  if (qm == OD_FLAT_QM) {
    for (i = 0; i < bsize_w * bsize_h; i++) {
      double tmp;
      tmp = x[i] - y[i];
      sum += tmp * tmp;
    }
  } else {
559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590
    int j;
    DECLARE_ALIGNED(16, od_coeff, e[MAX_TX_SQUARE]);
    DECLARE_ALIGNED(16, od_coeff, tmp[MAX_TX_SQUARE]);
    DECLARE_ALIGNED(16, od_coeff, e_lp[MAX_TX_SQUARE]);
    int mid = OD_DIST_LP_MID;
    for (i = 0; i < bsize_h; i++) {
      for (j = 0; j < bsize_w; j++) {
        e[i * bsize_w + j] = x[i * bsize_w + j] - y[i * bsize_w + j];
      }
    }
    for (i = 0; i < bsize_h; i++) {
      tmp[i * bsize_w] = mid * e[i * bsize_w] + 2 * e[i * bsize_w + 1];
      tmp[i * bsize_w + bsize_w - 1] =
          mid * e[i * bsize_w + bsize_w - 1] + 2 * e[i * bsize_w + bsize_w - 2];
      for (j = 1; j < bsize_w - 1; j++) {
        tmp[i * bsize_w + j] = mid * e[i * bsize_w + j] +
                               e[i * bsize_w + j - 1] + e[i * bsize_w + j + 1];
      }
    }
    for (j = 0; j < bsize_w; j++) {
      e_lp[j] = mid * tmp[j] + 2 * tmp[bsize_w + j];
      e_lp[(bsize_h - 1) * bsize_w + j] =
          mid * tmp[(bsize_h - 1) * bsize_w + j] +
          2 * tmp[(bsize_h - 2) * bsize_w + j];
    }
    for (i = 1; i < bsize_h - 1; i++) {
      for (j = 0; j < bsize_w; j++) {
        e_lp[i * bsize_w + j] = mid * tmp[i * bsize_w + j] +
                                tmp[(i - 1) * bsize_w + j] +
                                tmp[(i + 1) * bsize_w + j];
      }
    }
Yushin Cho's avatar
Yushin Cho committed
591 592 593
    for (i = 0; i < bsize_h; i += 8) {
      for (j = 0; j < bsize_w; j += 8) {
        sum += od_compute_dist_8x8(qm, activity_masking, &x[i * bsize_w + j],
594 595
                                   &y[i * bsize_w + j], &e_lp[i * bsize_w + j],
                                   bsize_w);
Yushin Cho's avatar
Yushin Cho committed
596 597
      }
    }
598 599 600 601 602 603 604 605 606 607 608
    /* Scale according to linear regression against SSE, for 8x8 blocks. */
    if (activity_masking) {
      sum *= 2.2 + (1.7 - 2.2) * (qindex - 99) / (210 - 99) +
             (qindex < 99 ? 2.5 * (qindex - 99) / 99 * (qindex - 99) / 99 : 0);
    } else {
      sum *= qindex >= 128
                 ? 1.4 + (0.9 - 1.4) * (qindex - 128) / (209 - 128)
                 : qindex <= 43
                       ? 1.5 + (2.0 - 1.5) * (qindex - 43) / (16 - 43)
                       : 1.5 + (1.4 - 1.5) * (qindex - 43) / (128 - 43);
    }
Yushin Cho's avatar
Yushin Cho committed
609 610 611 612
  }
  return sum;
}

Yushin Cho's avatar
Yushin Cho committed
613 614 615
int64_t av1_daala_dist(const uint8_t *src, int src_stride, const uint8_t *dst,
                       int dst_stride, int bsw, int bsh, int qm,
                       int use_activity_masking, int qindex) {
Yushin Cho's avatar
Yushin Cho committed
616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632
  int i, j;
  int64_t d;
  DECLARE_ALIGNED(16, od_coeff, orig[MAX_TX_SQUARE]);
  DECLARE_ALIGNED(16, od_coeff, rec[MAX_TX_SQUARE]);

  assert(qm == OD_HVS_QM);

  for (j = 0; j < bsh; j++)
    for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i];

  for (j = 0; j < bsh; j++)
    for (i = 0; i < bsw; i++) rec[j * bsw + i] = dst[j * dst_stride + i];

  d = (int64_t)od_compute_dist(qm, use_activity_masking, orig, rec, bsw, bsh,
                               qindex);
  return d;
}
633
#endif  // CONFIG_DAALA_DIST
Yushin Cho's avatar
Yushin Cho committed
634

Yaowu Xu's avatar
Yaowu Xu committed
635
static void get_energy_distribution_fine(const AV1_COMP *cpi, BLOCK_SIZE bsize,
636 637
                                         const uint8_t *src, int src_stride,
                                         const uint8_t *dst, int dst_stride,
638
                                         double *hordist, double *verdist) {
639 640
  const int bw = block_size_wide[bsize];
  const int bh = block_size_high[bsize];
641
  unsigned int esq[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
642

643
  const int f_index = bsize - BLOCK_16X16;
644
  if (f_index < 0) {
645 646
    const int w_shift = bw == 8 ? 1 : 2;
    const int h_shift = bh == 8 ? 1 : 2;
647
#if CONFIG_HIGHBITDEPTH
648
    if (cpi->common.use_highbitdepth) {
649 650 651 652 653
      const uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
      const uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
      for (int i = 0; i < bh; ++i)
        for (int j = 0; j < bw; ++j) {
          const int index = (j >> w_shift) + ((i >> h_shift) << 2);
654 655 656
          esq[index] +=
              (src16[j + i * src_stride] - dst16[j + i * dst_stride]) *
              (src16[j + i * src_stride] - dst16[j + i * dst_stride]);
657 658
        }
    } else {
659
#endif  // CONFIG_HIGHBITDEPTH
660

661 662 663
      for (int i = 0; i < bh; ++i)
        for (int j = 0; j < bw; ++j) {
          const int index = (j >> w_shift) + ((i >> h_shift) << 2);
664 665 666
          esq[index] += (src[j + i * src_stride] - dst[j + i * dst_stride]) *
                        (src[j + i * src_stride] - dst[j + i * dst_stride]);
        }
667
#if CONFIG_HIGHBITDEPTH
668
    }
669
#endif  // CONFIG_HIGHBITDEPTH
670
  } else {
671 672 673 674 675 676 677
    cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[0]);
    cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
                            &esq[1]);
    cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
                            &esq[2]);
    cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
                            dst_stride, &esq[3]);
678 679 680
    src += bh / 4 * src_stride;
    dst += bh / 4 * dst_stride;

681 682 683 684 685 686 687
    cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[4]);
    cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
                            &esq[5]);
    cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
                            &esq[6]);
    cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
                            dst_stride, &esq[7]);
688 689 690
    src += bh / 4 * src_stride;
    dst += bh / 4 * dst_stride;

691 692 693 694 695 696 697
    cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[8]);
    cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
                            &esq[9]);
    cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
                            &esq[10]);
    cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
                            dst_stride, &esq[11]);
698 699 700
    src += bh / 4 * src_stride;
    dst += bh / 4 * dst_stride;

701 702 703 704 705 706 707
    cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[12]);
    cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
                            &esq[13]);
    cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
                            &esq[14]);
    cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
                            dst_stride, &esq[15]);
708 709
  }

710 711 712
  double total = (double)esq[0] + esq[1] + esq[2] + esq[3] + esq[4] + esq[5] +
                 esq[6] + esq[7] + esq[8] + esq[9] + esq[10] + esq[11] +
                 esq[12] + esq[13] + esq[14] + esq[15];
713 714
  if (total > 0) {
    const double e_recip = 1.0 / total;
715 716 717 718 719 720
    hordist[0] = ((double)esq[0] + esq[4] + esq[8] + esq[12]) * e_recip;
    hordist[1] = ((double)esq[1] + esq[5] + esq[9] + esq[13]) * e_recip;
    hordist[2] = ((double)esq[2] + esq[6] + esq[10] + esq[14]) * e_recip;
    verdist[0] = ((double)esq[0] + esq[1] + esq[2] + esq[3]) * e_recip;
    verdist[1] = ((double)esq[4] + esq[5] + esq[6] + esq[7]) * e_recip;
    verdist[2] = ((double)esq[8] + esq[9] + esq[10] + esq[11]) * e_recip;
721 722 723 724 725 726 727
  } else {
    hordist[0] = verdist[0] = 0.25;
    hordist[1] = verdist[1] = 0.25;
    hordist[2] = verdist[2] = 0.25;
  }
}

Urvang Joshi's avatar
Urvang Joshi committed
728 729 730
static int adst_vs_flipadst(const AV1_COMP *cpi, BLOCK_SIZE bsize,
                            const uint8_t *src, int src_stride,
                            const uint8_t *dst, int dst_stride) {
731 732
  int prune_bitmask = 0;
  double svm_proj_h = 0, svm_proj_v = 0;
Alex Converse's avatar
Alex Converse committed
733
  double hdist[3] = { 0, 0, 0 }, vdist[3] = { 0, 0, 0 };
734 735
  get_energy_distribution_fine(cpi, bsize, src, src_stride, dst, dst_stride,
                               hdist, vdist);
736

737
  svm_proj_v = vdist[0] * ADST_FLIP_SVM[0] + vdist[1] * ADST_FLIP_SVM[1] +
738
               vdist[2] * ADST_FLIP_SVM[2] + ADST_FLIP_SVM[3];
739
  svm_proj_h = hdist[0] * ADST_FLIP_SVM[4] + hdist[1] * ADST_FLIP_SVM[5] +
740 741 742 743 744 745 746 747 748 749 750 751 752 753 754
               hdist[2] * ADST_FLIP_SVM[6] + ADST_FLIP_SVM[7];
  if (svm_proj_v > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN)
    prune_bitmask |= 1 << FLIPADST_1D;
  else if (svm_proj_v < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN)
    prune_bitmask |= 1 << ADST_1D;

  if (svm_proj_h > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN)
    prune_bitmask |= 1 << (FLIPADST_1D + 8);
  else if (svm_proj_h < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN)
    prune_bitmask |= 1 << (ADST_1D + 8);

  return prune_bitmask;
}

#if CONFIG_EXT_TX
Alex Converse's avatar
Alex Converse committed
755 756
static void get_horver_correlation(const int16_t *diff, int stride, int w,
                                   int h, double *hcorr, double *vcorr) {
757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783
  // Returns hor/ver correlation coefficient
  const int num = (h - 1) * (w - 1);
  double num_r;
  int i, j;
  int64_t xy_sum = 0, xz_sum = 0;
  int64_t x_sum = 0, y_sum = 0, z_sum = 0;
  int64_t x2_sum = 0, y2_sum = 0, z2_sum = 0;
  double x_var_n, y_var_n, z_var_n, xy_var_n, xz_var_n;
  *hcorr = *vcorr = 1;

  assert(num > 0);
  num_r = 1.0 / num;
  for (i = 1; i < h; ++i) {
    for (j = 1; j < w; ++j) {
      const int16_t x = diff[i * stride + j];
      const int16_t y = diff[i * stride + j - 1];
      const int16_t z = diff[(i - 1) * stride + j];
      xy_sum += x * y;
      xz_sum += x * z;
      x_sum += x;
      y_sum += y;
      z_sum += z;
      x2_sum += x * x;
      y2_sum += y * y;
      z2_sum += z * z;
    }
  }
784 785 786
  x_var_n = x2_sum - (x_sum * x_sum) * num_r;
  y_var_n = y2_sum - (y_sum * y_sum) * num_r;
  z_var_n = z2_sum - (z_sum * z_sum) * num_r;
787 788 789 790 791 792 793 794 795 796 797 798
  xy_var_n = xy_sum - (x_sum * y_sum) * num_r;
  xz_var_n = xz_sum - (x_sum * z_sum) * num_r;
  if (x_var_n > 0 && y_var_n > 0) {
    *hcorr = xy_var_n / sqrt(x_var_n * y_var_n);
    *hcorr = *hcorr < 0 ? 0 : *hcorr;
  }
  if (x_var_n > 0 && z_var_n > 0) {
    *vcorr = xz_var_n / sqrt(x_var_n * z_var_n);
    *vcorr = *vcorr < 0 ? 0 : *vcorr;
  }
}

Alex Converse's avatar
Alex Converse committed
799 800
int dct_vs_idtx(const int16_t *diff, int stride, int w, int h) {
  double hcorr, vcorr;
801
  int prune_bitmask = 0;
Alex Converse's avatar
Alex Converse committed
802
  get_horver_correlation(diff, stride, w, h, &hcorr, &vcorr);
803

Alex Converse's avatar
Alex Converse committed
804
  if (vcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN)
805
    prune_bitmask |= 1 << IDTX_1D;
Alex Converse's avatar
Alex Converse committed
806
  else if (vcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN)
807 808
    prune_bitmask |= 1 << DCT_1D;

Alex Converse's avatar
Alex Converse committed
809
  if (hcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN)
810
    prune_bitmask |= 1 << (IDTX_1D + 8);
Alex Converse's avatar
Alex Converse committed
811
  else if (hcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN)
812 813 814 815 816
    prune_bitmask |= 1 << (DCT_1D + 8);
  return prune_bitmask;
}

// Performance drop: 0.5%, Speed improvement: 24%
Yaowu Xu's avatar
Yaowu Xu committed
817
static int prune_two_for_sby(const AV1_COMP *cpi, BLOCK_SIZE bsize,
Alex Converse's avatar
Alex Converse committed
818 819
                             MACROBLOCK *x, const MACROBLOCKD *xd,
                             int adst_flipadst, int dct_idtx) {
820
  int prune = 0;
821

Alex Converse's avatar
Alex Converse committed
822 823 824
  if (adst_flipadst) {
    const struct macroblock_plane *const p = &x->plane[0];
    const struct macroblockd_plane *const pd = &xd->plane[0];
825
    prune |= adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride,
Alex Converse's avatar
Alex Converse committed
826 827 828 829 830 831 832 833 834
                              pd->dst.buf, pd->dst.stride);
  }
  if (dct_idtx) {
    av1_subtract_plane(x, bsize, 0);
    const struct macroblock_plane *const p = &x->plane[0];
    const int bw = 4 << (b_width_log2_lookup[bsize]);
    const int bh = 4 << (b_height_log2_lookup[bsize]);
    prune |= dct_vs_idtx(p->src_diff, bw, bw, bh);
  }
835 836 837

  return prune;
}
838 839
#endif  // CONFIG_EXT_TX

840
// Performance drop: 0.3%, Speed improvement: 5%
Yaowu Xu's avatar
Yaowu Xu committed
841
static int prune_one_for_sby(const AV1_COMP *cpi, BLOCK_SIZE bsize,
Alex Converse's avatar
Alex Converse committed
842 843 844
                             const MACROBLOCK *x, const MACROBLOCKD *xd) {
  const struct macroblock_plane *const p = &x->plane[0];
  const struct macroblockd_plane *const pd = &xd->plane[0];
845
  return adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride, pd->dst.buf,
Alex Converse's avatar
Alex Converse committed
846
                          pd->dst.stride);
847 848
}

Yaowu Xu's avatar
Yaowu Xu committed
849
static int prune_tx_types(const AV1_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x,
Urvang Joshi's avatar
Urvang Joshi committed
850
                          const MACROBLOCKD *const xd, int tx_set) {
851
#if CONFIG_EXT_TX
Yaowu Xu's avatar
Yaowu Xu committed
852
  const int *tx_set_1D = tx_set >= 0 ? ext_tx_used_inter_1D[tx_set] : NULL;
853
#else
854
  const int tx_set_1D[TX_TYPES_1D] = { 0 };
855
#endif  // CONFIG_EXT_TX
856

857
  switch (cpi->sf.tx_type_search.prune_mode) {
858 859
    case NO_PRUNE: return 0; break;
    case PRUNE_ONE:
860
      if ((tx_set >= 0) && !(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D]))
861
        return 0;
862 863
      return prune_one_for_sby(cpi, bsize, x, xd);
      break;
864 865
#if CONFIG_EXT_TX
    case PRUNE_TWO:
866
      if ((tx_set >= 0) && !(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D])) {
867
        if (!(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D])) return 0;
868 869
        return prune_two_for_sby(cpi, bsize, x, xd, 0, 1);
      }
870
      if ((tx_set >= 0) && !(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D]))
871 872
        return prune_two_for_sby(cpi, bsize, x, xd, 1, 0);
      return prune_two_for_sby(cpi, bsize, x, xd, 1, 1);
873
      break;
874
#endif  // CONFIG_EXT_TX
875 876 877 878 879
  }
  assert(0);
  return 0;
}

880
static int do_tx_type_search(TX_TYPE tx_type, int prune) {
881 882 883
// TODO(sarahparker) implement for non ext tx
#if CONFIG_EXT_TX
  return !(((prune >> vtx_tab[tx_type]) & 1) |
884
           ((prune >> (htx_tab[tx_type] + 8)) & 1));
885 886
#else
  // temporary to avoid compiler warnings
887 888 889 890
  (void)vtx_tab;
  (void)htx_tab;
  (void)tx_type;
  (void)prune;
891
  return 1;
892
#endif  // CONFIG_EXT_TX
893 894
}

Yaowu Xu's avatar
Yaowu Xu committed
895
static void model_rd_from_sse(const AV1_COMP *const cpi,
896 897
                              const MACROBLOCKD *const xd, BLOCK_SIZE bsize,
                              int plane, int64_t sse, int *rate,
Geza Lore's avatar
Geza Lore committed
898 899 900
                              int64_t *dist) {
  const struct macroblockd_plane *const pd = &xd->plane[plane];
  const int dequant_shift =
901
#if CONFIG_HIGHBITDEPTH
902
      (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 :
903
#endif  // CONFIG_HIGHBITDEPTH
904
                                                    3;
Geza Lore's avatar
Geza Lore committed
905 906 907 908 909 910 911

  // Fast approximate the modelling function.
  if (cpi->sf.simple_model_rd_from_var) {
    const int64_t square_error = sse;
    int quantizer = (pd->dequant[1] >> dequant_shift);

    if (quantizer < 120)
Yaowu Xu's avatar
Yaowu Xu committed
912
      *rate = (int)((square_error * (280 - quantizer)) >>
Yaowu Xu's avatar
Yaowu Xu committed
913
                    (16 - AV1_PROB_COST_SHIFT));
Geza Lore's avatar
Geza Lore committed
914 915 916 917
    else
      *rate = 0;
    *dist = (square_error * quantizer) >> 8;
  } else {
Yaowu Xu's avatar
Yaowu Xu committed
918 919
    av1_model_rd_from_var_lapndz(sse, num_pels_log2_lookup[bsize],
                                 pd->dequant[1] >> dequant_shift, rate, dist);
Geza Lore's avatar
Geza Lore committed
920 921 922 923 924
  }

  *dist <<= 4;
}

Yaowu Xu's avatar
Yaowu Xu committed
925
static void model_rd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
926 927 928 929
                            MACROBLOCK *x, MACROBLOCKD *xd, int plane_from,
                            int plane_to, int *out_rate_sum,
                            int64_t *out_dist_sum, int *skip_txfm_sb,
                            int64_t *skip_sse_sb) {
Jingning Han's avatar
Jingning Han committed
930 931 932
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
Geza Lore's avatar
Geza Lore committed
933 934 935
  int plane;
  const int ref = xd->mi[0]->mbmi.ref_frame[0];

Jingning Han's avatar
Jingning Han committed
936 937 938 939 940 941
  int64_t rate_sum = 0;
  int64_t dist_sum = 0;
  int64_t total_sse = 0;

  x->pred_sse[ref] = 0;

Geza Lore's avatar
Geza Lore committed
942 943 944
  for (plane = plane_from; plane <= plane_to; ++plane) {
    struct macroblock_plane *const p = &x->plane[plane];
    struct macroblockd_plane *const pd = &xd->plane[plane];
Jingning Han's avatar
Jingning Han committed
945
#if CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
946 947
    const BLOCK_SIZE bs = AOMMAX(BLOCK_4X4, get_plane_block_size(bsize, pd));
#else
Jingning Han's avatar
Jingning Han committed
948
    const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
949
#endif  // CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
Jingning Han's avatar
Jingning Han committed
950

Geza Lore's avatar
Geza Lore committed
951 952 953
    unsigned int sse;
    int rate;
    int64_t dist;
Jingning Han's avatar
Jingning Han committed
954

955 956
#if CONFIG_CB4X4
    if (x->skip_chroma_rd && plane) continue;
957
#endif  // CONFIG_CB4X4
958

Geza Lore's avatar
Geza Lore committed
959 960
    // TODO(geza): Write direct sse functions that do not compute
    // variance as well.
961 962
    cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride,
                       &sse);
Jingning Han's avatar
Jingning Han committed
963

964
    if (plane == 0) x->pred_sse[ref] = sse;
Jingning Han's avatar
Jingning Han committed
965

Geza Lore's avatar
Geza Lore committed
966
    total_sse += sse;
Jingning Han's avatar
Jingning Han committed
967

Geza Lore's avatar
Geza Lore committed
968
    model_rd_from_sse(cpi, xd, bs, plane, sse, &rate, &dist);
Geza Lore's avatar
Geza Lore committed
969 970 971

    rate_sum += rate;
    dist_sum += dist;
Jingning Han's avatar
Jingning Han committed
972 973
  }

Geza Lore's avatar
Geza Lore committed
974
  *skip_txfm_sb = total_sse == 0;
Jingning Han's avatar
Jingning Han committed
975 976
  *skip_sse_sb = total_sse << 4;
  *out_rate_sum = (int)rate_sum;
Geza Lore's avatar
Geza Lore committed
977
  *out_dist_sum = dist_sum;
Jingning Han's avatar
Jingning Han committed
978 979
}

Yaowu Xu's avatar
Yaowu Xu committed
980 981
int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
                          intptr_t block_size, int64_t *ssz) {
Jingning Han's avatar
Jingning Han committed
982 983 984 985 986
  int i;
  int64_t error = 0, sqcoeff = 0;

  for (i = 0; i < block_size; i++) {
    const int diff = coeff[i] - dqcoeff[i];
987
    error += diff * diff;
Jingning Han's avatar
Jingning Han committed
988 989 990 991 992 993 994
    sqcoeff += coeff[i] * coeff[i];
  }

  *ssz = sqcoeff;
  return error;
}

Yaowu Xu's avatar
Yaowu Xu committed
995 996
int64_t av1_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff,
                             int block_size) {
Jingning Han's avatar
Jingning Han committed
997 998 999 1000 1001
  int i;
  int64_t error = 0;

  for (i = 0; i < block_size; i++) {
    const int diff = coeff[i] - dqcoeff[i];
1002
    error += diff * diff;
Jingning Han's avatar
Jingning Han committed
1003 1004 1005 1006 1007
  }

  return error;
}

1008
#if CONFIG_HIGHBITDEPTH
Yaowu Xu's avatar
Yaowu Xu committed
1009 1010 1011
int64_t av1_highbd_block_error_c(const tran_low_t *coeff,
                                 const tran_low_t *dqcoeff, intptr_t block_size,
                                 int64_t *ssz, int bd) {
Jingning Han's avatar
Jingning Han committed
1012 1013 1014 1015 1016 1017 1018
  int i;
  int64_t error = 0, sqcoeff = 0;
  int shift = 2 * (bd - 8);
  int rounding = shift > 0 ? 1 << (shift - 1) : 0;

  for (i = 0; i < block_size; i++) {
    const int64_t diff = coeff[i] - dqcoeff[i];
1019
    error += diff * diff;
Jingning Han's avatar
Jingning Han committed
1020 1021 1022 1023 1024 1025 1026 1027 1028
    sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
  }
  assert(error >= 0 && sqcoeff >= 0);
  error = (error + rounding) >> shift;
  sqcoeff = (sqcoeff + rounding) >> shift;

  *ssz = sqcoeff;
  return error;
}
1029
#endif  // CONFIG_HIGHBITDEPTH
Jingning Han's avatar
Jingning Han committed
1030

Thomas Daede's avatar
Thomas Daede committed
1031 1032 1033 1034 1035 1036 1037 1038 1039 1040
#if CONFIG_PVQ
// Without PVQ, av1_block_error_c() return two kind of errors,
// 1) reconstruction (i.e. decoded) error and
// 2) Squared sum of transformed residue (i.e. 'coeff')
// However, if PVQ is enabled, coeff does not keep the transformed residue
// but instead a transformed original is kept.
// Hence, new parameter ref vector (i.e. transformed predicted signal)
// is required to derive the residue signal,
// i.e. coeff - ref = residue (all transformed).

1041
#if CONFIG_HIGHBITDEPTH
Thomas Daede's avatar
Thomas Daede committed
1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078
static int64_t av1_highbd_block_error2_c(const tran_low_t *coeff,
                                         const tran_low_t *dqcoeff,
                                         const tran_low_t *ref,
                                         intptr_t block_size, int64_t *ssz,
                                         int bd) {
  int64_t error;
  int64_t sqcoeff;
  int shift = 2 * (bd - 8);
  int rounding = shift > 0 ? 1 << (shift - 1) : 0;
  // Use the existing sse codes for calculating distortion of decoded signal:
  // i.e. (orig - decoded)^2
  // For high bit depth, throw away ssz until a 32-bit version of
  // av1_block_error_fp is written.
  int64_t ssz_trash;
  error = av1_block_error(coeff, dqcoeff, block_size, &ssz_trash);
  // prediction residue^2 = (orig - ref)^2
  sqcoeff = av1_block_error(coeff, ref, block_size, &ssz_trash);
  error = (error + rounding) >> shift;
  sqcoeff = (sqcoeff + rounding) >> shift;
  *ssz = sqcoeff;
  return error;
}
#else
// TODO(yushin) : Since 4x4 case does not need ssz, better to refactor into
// a separate function that does not do the extra computations for ssz.
static int64_t av1_block_error2_c(const tran_low_t *coeff,
                                  const tran_low_t *dqcoeff,
                                  const tran_low_t *ref, intptr_t block_size,
                                  int64_t *ssz) {
  int64_t error;
  // Use the existing sse codes for calculating distortion of decoded signal:
  // i.e. (orig - decoded)^2
  error = av1_block_error_fp(coeff, dqcoeff, block_size);
  // prediction residue^2 = (orig - ref)^2
  *ssz = av1_block_error_fp(coeff, ref, block_size);
  return error;
}
1079
#endif  // CONFIG_HIGHBITDEPTH
Thomas Daede's avatar
Thomas Daede committed
1080 1081
#endif  // CONFIG_PVQ

1082
#if !CONFIG_PVQ || CONFIG_VAR_TX
1083
/* The trailing '0' is a terminator which is used inside av1_cost_coeffs() to
Jingning Han's avatar
Jingning Han committed
1084 1085 1086 1087
 * decide whether to include cost of a trailing EOB node or not (i.e. we
 * can skip this if the last coefficient in this transform block, e.g. the
 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
 * were non-zero). */
1088 1089 1090 1091 1092
#if !CONFIG_LV_MAP
static int cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane,
                       int block, TX_SIZE tx_size, const SCAN_ORDER *scan_order,
                       const ENTROPY_CONTEXT *a, const ENTROPY_CONTEXT *l,
                       int use_fast_coef_costing) {
Jingning Han's avatar
Jingning Han committed
1093 1094 1095 1096 1097
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
  const struct macroblock_plane *p = &x->plane[plane];
  const struct macroblockd_plane *pd = &xd->plane[plane];
  const PLANE_TYPE type = pd->plane_type;
1098
  const uint16_t *band_count = &band_count_table[tx_size][1];
Jingning Han's avatar
Jingning Han committed
1099 1100
  const int eob = p->eobs[block];
  const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
1101
  const int tx_size_ctx = txsize_sqr_map[tx_size];
1102 1103
  unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
      x->token_costs[tx_size_ctx][type][is_inter_block(mbmi)];
1104
  uint8_t token_cache[MAX_TX_SQUARE];
1105
  int pt = combine_entropy_contexts(*a, *l);
Jingning Han's avatar
Jingning Han committed
1106
  int c, cost;
1107 1108
  const int16_t *scan = scan_order->scan;
  const int16_t *nb = scan_order->neighbors;
1109 1110 1111
  const int ref = is_inter_block(mbmi);
  aom_prob *blockz_probs =
      cm->fc->blockzero_probs[txsize_sqr_map[tx_size]][type][ref];
Thomas Davies's avatar
Thomas Davies committed
1112

1113
#if CONFIG_HIGHBITDEPTH
1114
  const int cat6_bits = av1_get_cat6_extrabits_size(tx_size, xd->bd);
Jingning Han's avatar
Jingning Han committed
1115
#else
1116
  const int cat6_bits = av1_get_cat6_extrabits_size(tx_size, 8);
1117
#endif  // CONFIG_HIGHBITDEPTH
Jingning Han's avatar
Jingning Han committed
1118

1119
#if !CONFIG_VAR_TX && !CONFIG_SUPERTX
Jingning Han's avatar
Jingning Han committed
1120
  // Check for consistency of tx_size with mode info
Angie Chiang's avatar
Angie Chiang committed
1121
  assert(tx_size == get_tx_size(plane, xd));
1122
#endif  // !CONFIG_VAR_TX && !CONFIG_SUPERTX
1123
  (void)cm;
Jingning Han's avatar
Jingning Han committed
1124 1125 1126

  if (eob == 0) {
    // single eob token
1127
    cost = av1_cost_bit(blockz_probs[pt], 0);
Jingning Han's avatar
Jingning Han committed
1128
  } else {
Julia Robson's avatar
Julia Robson committed
1129 1130 1131 1132 1133 1134
    if (use_fast_coef_costing) {
      int band_left = *band_count++;

      // dc token
      int v = qcoeff[0];
      int16_t prev_t;
1135
      cost = av1_get_token_cost(v, &prev_t, cat6_bits);
Thomas Davies's avatar
Thomas Davies committed
1136
      cost += (*token_costs)[!prev_t][pt][prev_t];
Julia Robson's avatar
Julia Robson committed
1137

Yaowu Xu's avatar
Yaowu Xu committed
1138
      token_cache[0] = av1_pt_energy_class[prev_t];
Julia Robson's avatar
Julia Robson committed
1139 1140 1141 1142 1143 1144 1145 1146
      ++token_costs;

      // ac tokens
      for (c = 1; c < eob; c++) {
        const int rc = scan[c];
        int16_t t;

        v = qcoeff[rc];
1147
        cost += av1_get_token_cost(v, &t, cat6_bits);
Thomas Davies's avatar