rdopt.c 403 KB
Newer Older
Jingning Han's avatar
Jingning Han committed
1
/*
Yaowu Xu's avatar
Yaowu Xu committed
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Jingning Han's avatar
Jingning Han committed
3
 *
Yaowu Xu's avatar
Yaowu Xu committed
4 5 6 7 8 9
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Jingning Han's avatar
Jingning Han committed
10 11 12 13 14
 */

#include <assert.h>
#include <math.h>

Yaowu Xu's avatar
Yaowu Xu committed
15
#include "./aom_dsp_rtcd.h"
Jingning Han's avatar
Jingning Han committed
16
#include "./av1_rtcd.h"
Jingning Han's avatar
Jingning Han committed
17

Yaowu Xu's avatar
Yaowu Xu committed
18
#include "aom_dsp/aom_dsp_common.h"
19
#include "aom_dsp/blend.h"
Yaowu Xu's avatar
Yaowu Xu committed
20
#include "aom_mem/aom_mem.h"
21 22
#include "aom_ports/mem.h"
#include "aom_ports/system_state.h"
Jingning Han's avatar
Jingning Han committed
23

24 25 26 27 28 29 30 31 32 33 34 35
#include "av1/common/common.h"
#include "av1/common/common_data.h"
#include "av1/common/entropy.h"
#include "av1/common/entropymode.h"
#include "av1/common/idct.h"
#include "av1/common/mvref_common.h"
#include "av1/common/pred_common.h"
#include "av1/common/quant_common.h"
#include "av1/common/reconinter.h"
#include "av1/common/reconintra.h"
#include "av1/common/scan.h"
#include "av1/common/seg_common.h"
36 37 38
#if CONFIG_LV_MAP
#include "av1/common/txb_common.h"
#endif
Yue Chen's avatar
Yue Chen committed
39 40 41
#if CONFIG_WARPED_MOTION
#include "av1/common/warped_motion.h"
#endif  // CONFIG_WARPED_MOTION
Jingning Han's avatar
Jingning Han committed
42

Jingning Han's avatar
Jingning Han committed
43
#include "av1/encoder/aq_variance.h"
44
#include "av1/encoder/av1_quantize.h"
45 46 47 48
#include "av1/encoder/cost.h"
#include "av1/encoder/encodemb.h"
#include "av1/encoder/encodemv.h"
#include "av1/encoder/encoder.h"
49 50 51
#if CONFIG_LV_MAP
#include "av1/encoder/encodetxb.h"
#endif
52 53
#include "av1/encoder/hybrid_fwd_txfm.h"
#include "av1/encoder/mcomp.h"
54
#if CONFIG_PALETTE
55
#include "av1/encoder/palette.h"
56
#endif  // CONFIG_PALETTE
57 58 59
#include "av1/encoder/ratectrl.h"
#include "av1/encoder/rd.h"
#include "av1/encoder/rdopt.h"
60
#include "av1/encoder/tokenize.h"
61 62
#if CONFIG_PVQ
#include "av1/encoder/pvq_encoder.h"
63
#endif  // CONFIG_PVQ
Yushin Cho's avatar
Yushin Cho committed
64 65
#if CONFIG_PVQ || CONFIG_DAALA_DIST
#include "av1/common/pvq.h"
66
#endif  // CONFIG_PVQ || CONFIG_DAALA_DIST
67
#if CONFIG_DUAL_FILTER
Angie Chiang's avatar
Angie Chiang committed
68
#define DUAL_FILTER_SET_SIZE (SWITCHABLE_FILTERS * SWITCHABLE_FILTERS)
69
#if USE_EXTRA_FILTER
Angie Chiang's avatar
Angie Chiang committed
70
static const int filter_sets[DUAL_FILTER_SET_SIZE][2] = {
71 72 73
  { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 }, { 1, 0 }, { 1, 1 },
  { 1, 2 }, { 1, 3 }, { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 },
  { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 },
74
};
75 76 77 78 79 80
#else   // USE_EXTRA_FILTER
static const int filter_sets[DUAL_FILTER_SET_SIZE][2] = {
  { 0, 0 }, { 0, 1 }, { 0, 2 }, { 1, 0 }, { 1, 1 },
  { 1, 2 }, { 2, 0 }, { 2, 1 }, { 2, 2 },
};
#endif  // USE_EXTRA_FILTER
Angie Chiang's avatar
Angie Chiang committed
81
#endif  // CONFIG_DUAL_FILTER
82

83 84
#if CONFIG_EXT_REFS

85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
#define LAST_FRAME_MODE_MASK                                      \
  ((1 << INTRA_FRAME) | (1 << LAST2_FRAME) | (1 << LAST3_FRAME) | \
   (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
#define LAST2_FRAME_MODE_MASK                                    \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST3_FRAME) | \
   (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
#define LAST3_FRAME_MODE_MASK                                    \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
   (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
#define GOLDEN_FRAME_MODE_MASK                                   \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
   (1 << LAST3_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
#define BWDREF_FRAME_MODE_MASK                                   \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
   (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME))
#define ALTREF_FRAME_MODE_MASK                                   \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
   (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME))

#else

#define LAST_FRAME_MODE_MASK \
  ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
#define GOLDEN_FRAME_MODE_MASK \
  ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
#define ALTREF_FRAME_MODE_MASK \
  ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | (1 << INTRA_FRAME))
Jingning Han's avatar
Jingning Han committed
112

113 114
#endif  // CONFIG_EXT_REFS

115
#if CONFIG_EXT_REFS
116
#define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | (1 << BWDREF_FRAME) | 0x01)
117
#else
118
#define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | 0x01)
119
#endif  // CONFIG_EXT_REFS
Jingning Han's avatar
Jingning Han committed
120

121 122
#define MIN_EARLY_TERM_INDEX 3
#define NEW_MV_DISCOUNT_FACTOR 8
Jingning Han's avatar
Jingning Han committed
123

124 125 126 127 128
#if CONFIG_EXT_INTRA
#define ANGLE_SKIP_THRESH 10
#define FILTER_FAST_SEARCH 1
#endif  // CONFIG_EXT_INTRA

129 130
const double ADST_FLIP_SVM[8] = { -6.6623, -2.8062, -3.2531, 3.1671,    // vert
                                  -7.7051, -3.2234, -3.6193, 3.4533 };  // horz
131

Jingning Han's avatar
Jingning Han committed
132 133 134 135 136
typedef struct {
  PREDICTION_MODE mode;
  MV_REFERENCE_FRAME ref_frame[2];
} MODE_DEFINITION;

137
typedef struct { MV_REFERENCE_FRAME ref_frame[2]; } REF_DEFINITION;
Jingning Han's avatar
Jingning Han committed
138 139

struct rdcost_block_args {
Yaowu Xu's avatar
Yaowu Xu committed
140
  const AV1_COMP *cpi;
Jingning Han's avatar
Jingning Han committed
141
  MACROBLOCK *x;
142 143
  ENTROPY_CONTEXT t_above[2 * MAX_MIB_SIZE];
  ENTROPY_CONTEXT t_left[2 * MAX_MIB_SIZE];
144
  RD_STATS rd_stats;
Jingning Han's avatar
Jingning Han committed
145 146 147 148 149 150 151
  int64_t this_rd;
  int64_t best_rd;
  int exit_early;
  int use_fast_coef_costing;
};

#define LAST_NEW_MV_INDEX 6
Yaowu Xu's avatar
Yaowu Xu committed
152
static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
Emil Keyder's avatar
Emil Keyder committed
153
  { NEARESTMV, { LAST_FRAME, NONE_FRAME } },
154
#if CONFIG_EXT_REFS
Emil Keyder's avatar
Emil Keyder committed
155 156 157
  { NEARESTMV, { LAST2_FRAME, NONE_FRAME } },
  { NEARESTMV, { LAST3_FRAME, NONE_FRAME } },
  { NEARESTMV, { BWDREF_FRAME, NONE_FRAME } },
158
#endif  // CONFIG_EXT_REFS
Emil Keyder's avatar
Emil Keyder committed
159 160
  { NEARESTMV, { ALTREF_FRAME, NONE_FRAME } },
  { NEARESTMV, { GOLDEN_FRAME, NONE_FRAME } },
Jingning Han's avatar
Jingning Han committed
161

Emil Keyder's avatar
Emil Keyder committed
162
  { DC_PRED, { INTRA_FRAME, NONE_FRAME } },
Jingning Han's avatar
Jingning Han committed
163

Emil Keyder's avatar
Emil Keyder committed
164
  { NEWMV, { LAST_FRAME, NONE_FRAME } },
165
#if CONFIG_EXT_REFS
Emil Keyder's avatar
Emil Keyder committed
166 167 168
  { NEWMV, { LAST2_FRAME, NONE_FRAME } },
  { NEWMV, { LAST3_FRAME, NONE_FRAME } },
  { NEWMV, { BWDREF_FRAME, NONE_FRAME } },
169
#endif  // CONFIG_EXT_REFS
Emil Keyder's avatar
Emil Keyder committed
170 171
  { NEWMV, { ALTREF_FRAME, NONE_FRAME } },
  { NEWMV, { GOLDEN_FRAME, NONE_FRAME } },
Jingning Han's avatar
Jingning Han committed
172

Emil Keyder's avatar
Emil Keyder committed
173
  { NEARMV, { LAST_FRAME, NONE_FRAME } },
174
#if CONFIG_EXT_REFS
Emil Keyder's avatar
Emil Keyder committed
175 176 177
  { NEARMV, { LAST2_FRAME, NONE_FRAME } },
  { NEARMV, { LAST3_FRAME, NONE_FRAME } },
  { NEARMV, { BWDREF_FRAME, NONE_FRAME } },
178
#endif  // CONFIG_EXT_REFS
Emil Keyder's avatar
Emil Keyder committed
179 180
  { NEARMV, { ALTREF_FRAME, NONE_FRAME } },
  { NEARMV, { GOLDEN_FRAME, NONE_FRAME } },
Jingning Han's avatar
Jingning Han committed
181

Emil Keyder's avatar
Emil Keyder committed
182
  { ZEROMV, { LAST_FRAME, NONE_FRAME } },
183
#if CONFIG_EXT_REFS
Emil Keyder's avatar
Emil Keyder committed
184 185 186
  { ZEROMV, { LAST2_FRAME, NONE_FRAME } },
  { ZEROMV, { LAST3_FRAME, NONE_FRAME } },
  { ZEROMV, { BWDREF_FRAME, NONE_FRAME } },
187
#endif  // CONFIG_EXT_REFS
Emil Keyder's avatar
Emil Keyder committed
188 189
  { ZEROMV, { GOLDEN_FRAME, NONE_FRAME } },
  { ZEROMV, { ALTREF_FRAME, NONE_FRAME } },
Jingning Han's avatar
Jingning Han committed
190

191
// TODO(zoeliu): May need to reconsider the order on the modes to check
192

193
#if CONFIG_EXT_INTER
194
  { NEAREST_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
195
#if CONFIG_EXT_REFS
196 197
  { NEAREST_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEAREST_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
198
#endif  // CONFIG_EXT_REFS
199
  { NEAREST_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
200
#if CONFIG_EXT_REFS
201 202 203 204
  { NEAREST_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
205
#endif  // CONFIG_EXT_REFS
206

207
#else  // CONFIG_EXT_INTER
208

209
  { NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
210
#if CONFIG_EXT_REFS
211 212
  { NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
213
#endif  // CONFIG_EXT_REFS
214
  { NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
215
#if CONFIG_EXT_REFS
216 217 218 219
  { NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
220
#endif  // CONFIG_EXT_REFS
221
#endif  // CONFIG_EXT_INTER
Jingning Han's avatar
Jingning Han committed
222

Emil Keyder's avatar
Emil Keyder committed
223
  { TM_PRED, { INTRA_FRAME, NONE_FRAME } },
Jingning Han's avatar
Jingning Han committed
224

225
#if CONFIG_ALT_INTRA
Emil Keyder's avatar
Emil Keyder committed
226
  { SMOOTH_PRED, { INTRA_FRAME, NONE_FRAME } },
Urvang Joshi's avatar
Urvang Joshi committed
227 228 229 230
#if CONFIG_SMOOTH_HV
  { SMOOTH_V_PRED, { INTRA_FRAME, NONE_FRAME } },
  { SMOOTH_H_PRED, { INTRA_FRAME, NONE_FRAME } },
#endif  // CONFIG_SMOOTH_HV
231 232
#endif  // CONFIG_ALT_INTRA

233
#if CONFIG_EXT_INTER
234 235 236 237 238 239 240
  { NEAR_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEW_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEAREST_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEW_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEAR_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEW_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
  { ZERO_ZEROMV, { LAST_FRAME, ALTREF_FRAME } },
241

242
#if CONFIG_EXT_REFS
243 244 245 246 247 248 249 250 251 252 253 254 255 256 257
  { NEAR_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEW_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEAREST_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEW_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEAR_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEW_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
  { ZERO_ZEROMV, { LAST2_FRAME, ALTREF_FRAME } },

  { NEAR_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEW_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEAREST_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEW_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEAR_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEW_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
  { ZERO_ZEROMV, { LAST3_FRAME, ALTREF_FRAME } },
258
#endif  // CONFIG_EXT_REFS
259

260 261 262 263 264 265 266
  { NEAR_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEW_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEAREST_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEW_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEAR_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEW_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { ZERO_ZEROMV, { GOLDEN_FRAME, ALTREF_FRAME } },
267 268

#if CONFIG_EXT_REFS
269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299
  { NEAR_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEW_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEAREST_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEW_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEAR_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEW_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
  { ZERO_ZEROMV, { LAST_FRAME, BWDREF_FRAME } },

  { NEAR_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEW_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEAREST_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEW_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEAR_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEW_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
  { ZERO_ZEROMV, { LAST2_FRAME, BWDREF_FRAME } },

  { NEAR_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEW_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEAREST_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEW_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEAR_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEW_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
  { ZERO_ZEROMV, { LAST3_FRAME, BWDREF_FRAME } },

  { NEAR_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEW_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEAREST_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEW_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEAR_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEW_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { ZERO_ZEROMV, { GOLDEN_FRAME, BWDREF_FRAME } },
300
#endif  // CONFIG_EXT_REFS
301 302 303

#else  // CONFIG_EXT_INTER

304 305
  { NEARMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEWMV, { LAST_FRAME, ALTREF_FRAME } },
306
#if CONFIG_EXT_REFS
307 308 309 310
  { NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
311
#endif  // CONFIG_EXT_REFS
312 313
  { NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
314 315

#if CONFIG_EXT_REFS
316 317 318 319 320 321 322 323
  { NEARMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEWMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
324
#endif  // CONFIG_EXT_REFS
Jingning Han's avatar
Jingning Han committed
325

326
  { ZEROMV, { LAST_FRAME, ALTREF_FRAME } },
327
#if CONFIG_EXT_REFS
328 329
  { ZEROMV, { LAST2_FRAME, ALTREF_FRAME } },
  { ZEROMV, { LAST3_FRAME, ALTREF_FRAME } },
330
#endif  // CONFIG_EXT_REFS
331
  { ZEROMV, { GOLDEN_FRAME, ALTREF_FRAME } },
332 333

#if CONFIG_EXT_REFS
334 335 336 337
  { ZEROMV, { LAST_FRAME, BWDREF_FRAME } },
  { ZEROMV, { LAST2_FRAME, BWDREF_FRAME } },
  { ZEROMV, { LAST3_FRAME, BWDREF_FRAME } },
  { ZEROMV, { GOLDEN_FRAME, BWDREF_FRAME } },
338
#endif  // CONFIG_EXT_REFS
339

340
#endif  // CONFIG_EXT_INTER
Jingning Han's avatar
Jingning Han committed
341

Emil Keyder's avatar
Emil Keyder committed
342 343 344 345 346 347 348 349
  { H_PRED, { INTRA_FRAME, NONE_FRAME } },
  { V_PRED, { INTRA_FRAME, NONE_FRAME } },
  { D135_PRED, { INTRA_FRAME, NONE_FRAME } },
  { D207_PRED, { INTRA_FRAME, NONE_FRAME } },
  { D153_PRED, { INTRA_FRAME, NONE_FRAME } },
  { D63_PRED, { INTRA_FRAME, NONE_FRAME } },
  { D117_PRED, { INTRA_FRAME, NONE_FRAME } },
  { D45_PRED, { INTRA_FRAME, NONE_FRAME } },
350 351

#if CONFIG_EXT_INTER
352 353 354 355
  { ZEROMV, { LAST_FRAME, INTRA_FRAME } },
  { NEARESTMV, { LAST_FRAME, INTRA_FRAME } },
  { NEARMV, { LAST_FRAME, INTRA_FRAME } },
  { NEWMV, { LAST_FRAME, INTRA_FRAME } },
356 357

#if CONFIG_EXT_REFS
358 359 360 361 362 363 364 365 366
  { ZEROMV, { LAST2_FRAME, INTRA_FRAME } },
  { NEARESTMV, { LAST2_FRAME, INTRA_FRAME } },
  { NEARMV, { LAST2_FRAME, INTRA_FRAME } },
  { NEWMV, { LAST2_FRAME, INTRA_FRAME } },

  { ZEROMV, { LAST3_FRAME, INTRA_FRAME } },
  { NEARESTMV, { LAST3_FRAME, INTRA_FRAME } },
  { NEARMV, { LAST3_FRAME, INTRA_FRAME } },
  { NEWMV, { LAST3_FRAME, INTRA_FRAME } },
367 368
#endif  // CONFIG_EXT_REFS

369 370 371 372
  { ZEROMV, { GOLDEN_FRAME, INTRA_FRAME } },
  { NEARESTMV, { GOLDEN_FRAME, INTRA_FRAME } },
  { NEARMV, { GOLDEN_FRAME, INTRA_FRAME } },
  { NEWMV, { GOLDEN_FRAME, INTRA_FRAME } },
373

374
#if CONFIG_EXT_REFS
375 376 377 378
  { ZEROMV, { BWDREF_FRAME, INTRA_FRAME } },
  { NEARESTMV, { BWDREF_FRAME, INTRA_FRAME } },
  { NEARMV, { BWDREF_FRAME, INTRA_FRAME } },
  { NEWMV, { BWDREF_FRAME, INTRA_FRAME } },
379 380
#endif  // CONFIG_EXT_REFS

381 382 383 384
  { ZEROMV, { ALTREF_FRAME, INTRA_FRAME } },
  { NEARESTMV, { ALTREF_FRAME, INTRA_FRAME } },
  { NEARMV, { ALTREF_FRAME, INTRA_FRAME } },
  { NEWMV, { ALTREF_FRAME, INTRA_FRAME } },
385
#endif  // CONFIG_EXT_INTER
Jingning Han's avatar
Jingning Han committed
386 387
};

388
#if CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA || CONFIG_PALETTE
hui su's avatar
hui su committed
389
static INLINE int write_uniform_cost(int n, int v) {
390 391
  const int l = get_unsigned_bits(n);
  const int m = (1 << l) - n;
392
  if (l == 0) return 0;
hui su's avatar
hui su committed
393
  if (v < m)
Yaowu Xu's avatar
Yaowu Xu committed
394
    return (l - 1) * av1_cost_bit(128, 0);
hui su's avatar
hui su committed
395
  else
Yaowu Xu's avatar
Yaowu Xu committed
396
    return l * av1_cost_bit(128, 0);
hui su's avatar
hui su committed
397
}
398
#endif  // CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA || CONFIG_PALETTE
hui su's avatar
hui su committed
399

400 401 402
// constants for prune 1 and prune 2 decision boundaries
#define FAST_EXT_TX_CORR_MID 0.0
#define FAST_EXT_TX_EDST_MID 0.1
403 404 405
#define FAST_EXT_TX_CORR_MARGIN 0.5
#define FAST_EXT_TX_EDST_MARGIN 0.3

Yushin Cho's avatar
Yushin Cho committed
406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426
#if CONFIG_DAALA_DIST
static int od_compute_var_4x4(od_coeff *x, int stride) {
  int sum;
  int s2;
  int i;
  sum = 0;
  s2 = 0;
  for (i = 0; i < 4; i++) {
    int j;
    for (j = 0; j < 4; j++) {
      int t;

      t = x[i * stride + j];
      sum += t;
      s2 += t * t;
    }
  }
  // TODO(yushin) : Check wheter any changes are required for high bit depth.
  return (s2 - (sum * sum >> 4)) >> 4;
}

427 428 429 430 431 432 433
/* OD_DIST_LP_MID controls the frequency weighting filter used for computing
   the distortion. For a value X, the filter is [1 X 1]/(X + 2) and
   is applied both horizontally and vertically. For X=5, the filter is
   a good approximation for the OD_QM8_Q4_HVS quantization matrix. */
#define OD_DIST_LP_MID (5)
#define OD_DIST_LP_NORM (OD_DIST_LP_MID + 2)

Yushin Cho's avatar
Yushin Cho committed
434
static double od_compute_dist_8x8(int qm, int use_activity_masking, od_coeff *x,
435
                                  od_coeff *y, od_coeff *e_lp, int stride) {
Yushin Cho's avatar
Yushin Cho committed
436 437 438 439 440 441 442 443 444 445 446 447
  double sum;
  int min_var;
  double mean_var;
  double var_stat;
  double activity;
  double calibration;
  int i;
  int j;
  double vardist;

  vardist = 0;
  OD_ASSERT(qm != OD_FLAT_QM);
448
  (void)qm;
Yushin Cho's avatar
Yushin Cho committed
449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479
#if 1
  min_var = INT_MAX;
  mean_var = 0;
  for (i = 0; i < 3; i++) {
    for (j = 0; j < 3; j++) {
      int varx;
      int vary;
      varx = od_compute_var_4x4(x + 2 * i * stride + 2 * j, stride);
      vary = od_compute_var_4x4(y + 2 * i * stride + 2 * j, stride);
      min_var = OD_MINI(min_var, varx);
      mean_var += 1. / (1 + varx);
      /* The cast to (double) is to avoid an overflow before the sqrt.*/
      vardist += varx - 2 * sqrt(varx * (double)vary) + vary;
    }
  }
  /* We use a different variance statistic depending on whether activity
     masking is used, since the harmonic mean appeared slghtly worse with
     masking off. The calibration constant just ensures that we preserve the
     rate compared to activity=1. */
  if (use_activity_masking) {
    calibration = 1.95;
    var_stat = 9. / mean_var;
  } else {
    calibration = 1.62;
    var_stat = min_var;
  }
  /* 1.62 is a calibration constant, 0.25 is a noise floor and 1/6 is the
     activity masking constant. */
  activity = calibration * pow(.25 + var_stat, -1. / 6);
#else
  activity = 1;
480
#endif  // 1
Yushin Cho's avatar
Yushin Cho committed
481 482 483
  sum = 0;
  for (i = 0; i < 8; i++) {
    for (j = 0; j < 8; j++)
484
      sum += e_lp[i * stride + j] * (double)e_lp[i * stride + j];
Yushin Cho's avatar
Yushin Cho committed
485
  }
486 487 488
  /* Normalize the filter to unit DC response. */
  sum *= 1. / (OD_DIST_LP_NORM * OD_DIST_LP_NORM * OD_DIST_LP_NORM *
               OD_DIST_LP_NORM);
Yushin Cho's avatar
Yushin Cho committed
489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508
  return activity * activity * (sum + vardist);
}

// Note : Inputs x and y are in a pixel domain
static double od_compute_dist(int qm, int activity_masking, od_coeff *x,
                              od_coeff *y, int bsize_w, int bsize_h,
                              int qindex) {
  int i;
  double sum;
  sum = 0;

  assert(bsize_w >= 8 && bsize_h >= 8);

  if (qm == OD_FLAT_QM) {
    for (i = 0; i < bsize_w * bsize_h; i++) {
      double tmp;
      tmp = x[i] - y[i];
      sum += tmp * tmp;
    }
  } else {
509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540
    int j;
    DECLARE_ALIGNED(16, od_coeff, e[MAX_TX_SQUARE]);
    DECLARE_ALIGNED(16, od_coeff, tmp[MAX_TX_SQUARE]);
    DECLARE_ALIGNED(16, od_coeff, e_lp[MAX_TX_SQUARE]);
    int mid = OD_DIST_LP_MID;
    for (i = 0; i < bsize_h; i++) {
      for (j = 0; j < bsize_w; j++) {
        e[i * bsize_w + j] = x[i * bsize_w + j] - y[i * bsize_w + j];
      }
    }
    for (i = 0; i < bsize_h; i++) {
      tmp[i * bsize_w] = mid * e[i * bsize_w] + 2 * e[i * bsize_w + 1];
      tmp[i * bsize_w + bsize_w - 1] =
          mid * e[i * bsize_w + bsize_w - 1] + 2 * e[i * bsize_w + bsize_w - 2];
      for (j = 1; j < bsize_w - 1; j++) {
        tmp[i * bsize_w + j] = mid * e[i * bsize_w + j] +
                               e[i * bsize_w + j - 1] + e[i * bsize_w + j + 1];
      }
    }
    for (j = 0; j < bsize_w; j++) {
      e_lp[j] = mid * tmp[j] + 2 * tmp[bsize_w + j];
      e_lp[(bsize_h - 1) * bsize_w + j] =
          mid * tmp[(bsize_h - 1) * bsize_w + j] +
          2 * tmp[(bsize_h - 2) * bsize_w + j];
    }
    for (i = 1; i < bsize_h - 1; i++) {
      for (j = 0; j < bsize_w; j++) {
        e_lp[i * bsize_w + j] = mid * tmp[i * bsize_w + j] +
                                tmp[(i - 1) * bsize_w + j] +
                                tmp[(i + 1) * bsize_w + j];
      }
    }
Yushin Cho's avatar
Yushin Cho committed
541 542 543
    for (i = 0; i < bsize_h; i += 8) {
      for (j = 0; j < bsize_w; j += 8) {
        sum += od_compute_dist_8x8(qm, activity_masking, &x[i * bsize_w + j],
544 545
                                   &y[i * bsize_w + j], &e_lp[i * bsize_w + j],
                                   bsize_w);
Yushin Cho's avatar
Yushin Cho committed
546 547
      }
    }
548 549 550 551 552 553 554 555 556 557 558
    /* Scale according to linear regression against SSE, for 8x8 blocks. */
    if (activity_masking) {
      sum *= 2.2 + (1.7 - 2.2) * (qindex - 99) / (210 - 99) +
             (qindex < 99 ? 2.5 * (qindex - 99) / 99 * (qindex - 99) / 99 : 0);
    } else {
      sum *= qindex >= 128
                 ? 1.4 + (0.9 - 1.4) * (qindex - 128) / (209 - 128)
                 : qindex <= 43
                       ? 1.5 + (2.0 - 1.5) * (qindex - 43) / (16 - 43)
                       : 1.5 + (1.4 - 1.5) * (qindex - 43) / (128 - 43);
    }
Yushin Cho's avatar
Yushin Cho committed
559 560 561 562
  }
  return sum;
}

Yushin Cho's avatar
Yushin Cho committed
563 564 565
int64_t av1_daala_dist(const uint8_t *src, int src_stride, const uint8_t *dst,
                       int dst_stride, int bsw, int bsh, int qm,
                       int use_activity_masking, int qindex) {
Yushin Cho's avatar
Yushin Cho committed
566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582
  int i, j;
  int64_t d;
  DECLARE_ALIGNED(16, od_coeff, orig[MAX_TX_SQUARE]);
  DECLARE_ALIGNED(16, od_coeff, rec[MAX_TX_SQUARE]);

  assert(qm == OD_HVS_QM);

  for (j = 0; j < bsh; j++)
    for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i];

  for (j = 0; j < bsh; j++)
    for (i = 0; i < bsw; i++) rec[j * bsw + i] = dst[j * dst_stride + i];

  d = (int64_t)od_compute_dist(qm, use_activity_masking, orig, rec, bsw, bsh,
                               qindex);
  return d;
}
583
#endif  // CONFIG_DAALA_DIST
Yushin Cho's avatar
Yushin Cho committed
584

Yaowu Xu's avatar
Yaowu Xu committed
585
static void get_energy_distribution_fine(const AV1_COMP *cpi, BLOCK_SIZE bsize,
586 587
                                         const uint8_t *src, int src_stride,
                                         const uint8_t *dst, int dst_stride,
588
                                         double *hordist, double *verdist) {
589 590
  const int bw = block_size_wide[bsize];
  const int bh = block_size_high[bsize];
591
  unsigned int esq[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
592

593
  const int f_index = bsize - BLOCK_16X16;
594
  if (f_index < 0) {
595 596
    const int w_shift = bw == 8 ? 1 : 2;
    const int h_shift = bh == 8 ? 1 : 2;
597
#if CONFIG_HIGHBITDEPTH
598
    if (cpi->common.use_highbitdepth) {
599 600 601 602 603
      const uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
      const uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
      for (int i = 0; i < bh; ++i)
        for (int j = 0; j < bw; ++j) {
          const int index = (j >> w_shift) + ((i >> h_shift) << 2);
604 605 606
          esq[index] +=
              (src16[j + i * src_stride] - dst16[j + i * dst_stride]) *
              (src16[j + i * src_stride] - dst16[j + i * dst_stride]);
607 608
        }
    } else {
609
#endif  // CONFIG_HIGHBITDEPTH
610

611 612 613
      for (int i = 0; i < bh; ++i)
        for (int j = 0; j < bw; ++j) {
          const int index = (j >> w_shift) + ((i >> h_shift) << 2);
614 615 616
          esq[index] += (src[j + i * src_stride] - dst[j + i * dst_stride]) *
                        (src[j + i * src_stride] - dst[j + i * dst_stride]);
        }
617
#if CONFIG_HIGHBITDEPTH
618
    }
619
#endif  // CONFIG_HIGHBITDEPTH
620
  } else {
621 622 623 624 625 626 627
    cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[0]);
    cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
                            &esq[1]);
    cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
                            &esq[2]);
    cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
                            dst_stride, &esq[3]);
628 629 630
    src += bh / 4 * src_stride;
    dst += bh / 4 * dst_stride;

631 632 633 634 635 636 637
    cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[4]);
    cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
                            &esq[5]);
    cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
                            &esq[6]);
    cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
                            dst_stride, &esq[7]);
638 639 640
    src += bh / 4 * src_stride;
    dst += bh / 4 * dst_stride;

641 642 643 644 645 646 647
    cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[8]);
    cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
                            &esq[9]);
    cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
                            &esq[10]);
    cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
                            dst_stride, &esq[11]);
648 649 650
    src += bh / 4 * src_stride;
    dst += bh / 4 * dst_stride;

651 652 653 654 655 656 657
    cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[12]);
    cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
                            &esq[13]);
    cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
                            &esq[14]);
    cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
                            dst_stride, &esq[15]);
658 659
  }

660 661 662
  double total = (double)esq[0] + esq[1] + esq[2] + esq[3] + esq[4] + esq[5] +
                 esq[6] + esq[7] + esq[8] + esq[9] + esq[10] + esq[11] +
                 esq[12] + esq[13] + esq[14] + esq[15];
663 664
  if (total > 0) {
    const double e_recip = 1.0 / total;
665 666 667 668 669 670
    hordist[0] = ((double)esq[0] + esq[4] + esq[8] + esq[12]) * e_recip;
    hordist[1] = ((double)esq[1] + esq[5] + esq[9] + esq[13]) * e_recip;
    hordist[2] = ((double)esq[2] + esq[6] + esq[10] + esq[14]) * e_recip;
    verdist[0] = ((double)esq[0] + esq[1] + esq[2] + esq[3]) * e_recip;
    verdist[1] = ((double)esq[4] + esq[5] + esq[6] + esq[7]) * e_recip;
    verdist[2] = ((double)esq[8] + esq[9] + esq[10] + esq[11]) * e_recip;
671 672 673 674 675 676 677
  } else {
    hordist[0] = verdist[0] = 0.25;
    hordist[1] = verdist[1] = 0.25;
    hordist[2] = verdist[2] = 0.25;
  }
}

Urvang Joshi's avatar
Urvang Joshi committed
678 679 680
static int adst_vs_flipadst(const AV1_COMP *cpi, BLOCK_SIZE bsize,
                            const uint8_t *src, int src_stride,
                            const uint8_t *dst, int dst_stride) {
681 682
  int prune_bitmask = 0;
  double svm_proj_h = 0, svm_proj_v = 0;
Alex Converse's avatar
Alex Converse committed
683
  double hdist[3] = { 0, 0, 0 }, vdist[3] = { 0, 0, 0 };
684 685
  get_energy_distribution_fine(cpi, bsize, src, src_stride, dst, dst_stride,
                               hdist, vdist);
686

687
  svm_proj_v = vdist[0] * ADST_FLIP_SVM[0] + vdist[1] * ADST_FLIP_SVM[1] +
688
               vdist[2] * ADST_FLIP_SVM[2] + ADST_FLIP_SVM[3];
689
  svm_proj_h = hdist[0] * ADST_FLIP_SVM[4] + hdist[1] * ADST_FLIP_SVM[5] +
690 691 692 693 694 695 696 697 698 699 700 701 702 703 704
               hdist[2] * ADST_FLIP_SVM[6] + ADST_FLIP_SVM[7];
  if (svm_proj_v > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN)
    prune_bitmask |= 1 << FLIPADST_1D;
  else if (svm_proj_v < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN)
    prune_bitmask |= 1 << ADST_1D;

  if (svm_proj_h > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN)
    prune_bitmask |= 1 << (FLIPADST_1D + 8);
  else if (svm_proj_h < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN)
    prune_bitmask |= 1 << (ADST_1D + 8);

  return prune_bitmask;
}

#if CONFIG_EXT_TX
Alex Converse's avatar
Alex Converse committed
705 706
static void get_horver_correlation(const int16_t *diff, int stride, int w,
                                   int h, double *hcorr, double *vcorr) {
707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733
  // Returns hor/ver correlation coefficient
  const int num = (h - 1) * (w - 1);
  double num_r;
  int i, j;
  int64_t xy_sum = 0, xz_sum = 0;
  int64_t x_sum = 0, y_sum = 0, z_sum = 0;
  int64_t x2_sum = 0, y2_sum = 0, z2_sum = 0;
  double x_var_n, y_var_n, z_var_n, xy_var_n, xz_var_n;
  *hcorr = *vcorr = 1;

  assert(num > 0);
  num_r = 1.0 / num;
  for (i = 1; i < h; ++i) {
    for (j = 1; j < w; ++j) {
      const int16_t x = diff[i * stride + j];
      const int16_t y = diff[i * stride + j - 1];
      const int16_t z = diff[(i - 1) * stride + j];
      xy_sum += x * y;
      xz_sum += x * z;
      x_sum += x;
      y_sum += y;
      z_sum += z;
      x2_sum += x * x;
      y2_sum += y * y;
      z2_sum += z * z;
    }
  }
734 735 736
  x_var_n = x2_sum - (x_sum * x_sum) * num_r;
  y_var_n = y2_sum - (y_sum * y_sum) * num_r;
  z_var_n = z2_sum - (z_sum * z_sum) * num_r;
737 738 739 740 741 742 743 744 745 746 747 748
  xy_var_n = xy_sum - (x_sum * y_sum) * num_r;
  xz_var_n = xz_sum - (x_sum * z_sum) * num_r;
  if (x_var_n > 0 && y_var_n > 0) {
    *hcorr = xy_var_n / sqrt(x_var_n * y_var_n);
    *hcorr = *hcorr < 0 ? 0 : *hcorr;
  }
  if (x_var_n > 0 && z_var_n > 0) {
    *vcorr = xz_var_n / sqrt(x_var_n * z_var_n);
    *vcorr = *vcorr < 0 ? 0 : *vcorr;
  }
}

Alex Converse's avatar
Alex Converse committed
749 750
int dct_vs_idtx(const int16_t *diff, int stride, int w, int h) {
  double hcorr, vcorr;
751
  int prune_bitmask = 0;
Alex Converse's avatar
Alex Converse committed
752
  get_horver_correlation(diff, stride, w, h, &hcorr, &vcorr);
753

Alex Converse's avatar
Alex Converse committed
754
  if (vcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN)
755
    prune_bitmask |= 1 << IDTX_1D;
Alex Converse's avatar
Alex Converse committed
756
  else if (vcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN)
757 758
    prune_bitmask |= 1 << DCT_1D;

Alex Converse's avatar
Alex Converse committed
759
  if (hcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN)
760
    prune_bitmask |= 1 << (IDTX_1D + 8);
Alex Converse's avatar
Alex Converse committed
761
  else if (hcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN)
762 763 764 765 766
    prune_bitmask |= 1 << (DCT_1D + 8);
  return prune_bitmask;
}

// Performance drop: 0.5%, Speed improvement: 24%
Yaowu Xu's avatar
Yaowu Xu committed
767
static int prune_two_for_sby(const AV1_COMP *cpi, BLOCK_SIZE bsize,
Alex Converse's avatar
Alex Converse committed
768 769
                             MACROBLOCK *x, const MACROBLOCKD *xd,
                             int adst_flipadst, int dct_idtx) {
770
  int prune = 0;
771

Alex Converse's avatar
Alex Converse committed
772 773 774
  if (adst_flipadst) {
    const struct macroblock_plane *const p = &x->plane[0];
    const struct macroblockd_plane *const pd = &xd->plane[0];
775
    prune |= adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride,
Alex Converse's avatar
Alex Converse committed
776 777 778 779 780 781 782 783 784
                              pd->dst.buf, pd->dst.stride);
  }
  if (dct_idtx) {
    av1_subtract_plane(x, bsize, 0);
    const struct macroblock_plane *const p = &x->plane[0];
    const int bw = 4 << (b_width_log2_lookup[bsize]);
    const int bh = 4 << (b_height_log2_lookup[bsize]);
    prune |= dct_vs_idtx(p->src_diff, bw, bw, bh);
  }
785 786 787

  return prune;
}
788 789
#endif  // CONFIG_EXT_TX

790
// Performance drop: 0.3%, Speed improvement: 5%
Yaowu Xu's avatar
Yaowu Xu committed
791
static int prune_one_for_sby(const AV1_COMP *cpi, BLOCK_SIZE bsize,
Alex Converse's avatar
Alex Converse committed
792 793 794
                             const MACROBLOCK *x, const MACROBLOCKD *xd) {
  const struct macroblock_plane *const p = &x->plane[0];
  const struct macroblockd_plane *const pd = &xd->plane[0];
795
  return adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride, pd->dst.buf,
Alex Converse's avatar
Alex Converse committed
796
                          pd->dst.stride);
797 798
}

Yaowu Xu's avatar
Yaowu Xu committed
799
static int prune_tx_types(const AV1_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x,
Urvang Joshi's avatar
Urvang Joshi committed
800
                          const MACROBLOCKD *const xd, int tx_set) {
801
#if CONFIG_EXT_TX
Yaowu Xu's avatar
Yaowu Xu committed
802
  const int *tx_set_1D = tx_set >= 0 ? ext_tx_used_inter_1D[tx_set] : NULL;
803
#else
804
  const int tx_set_1D[TX_TYPES_1D] = { 0 };
805
#endif  // CONFIG_EXT_TX
806

807
  switch (cpi->sf.tx_type_search.prune_mode) {
808 809
    case NO_PRUNE: return 0; break;
    case PRUNE_ONE:
810
      if ((tx_set >= 0) && !(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D]))
811
        return 0;
812 813
      return prune_one_for_sby(cpi, bsize, x, xd);
      break;
814 815
#if CONFIG_EXT_TX
    case PRUNE_TWO:
816
      if ((tx_set >= 0) && !(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D])) {
817
        if (!(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D])) return 0;
818 819
        return prune_two_for_sby(cpi, bsize, x, xd, 0, 1);
      }
820
      if ((tx_set >= 0) && !(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D]))
821 822
        return prune_two_for_sby(cpi, bsize, x, xd, 1, 0);
      return prune_two_for_sby(cpi, bsize, x, xd, 1, 1);
823
      break;
824
#endif  // CONFIG_EXT_TX
825 826 827 828 829
  }
  assert(0);
  return 0;
}

830
static int do_tx_type_search(TX_TYPE tx_type, int prune) {
831 832 833
// TODO(sarahparker) implement for non ext tx
#if CONFIG_EXT_TX
  return !(((prune >> vtx_tab[tx_type]) & 1) |
834
           ((prune >> (htx_tab[tx_type] + 8)) & 1));
835 836
#else
  // temporary to avoid compiler warnings
837 838 839 840
  (void)vtx_tab;
  (void)htx_tab;
  (void)tx_type;
  (void)prune;
841
  return 1;
842
#endif  // CONFIG_EXT_TX
843 844
}

Yaowu Xu's avatar
Yaowu Xu committed
845
static void model_rd_from_sse(const AV1_COMP *const cpi,
846 847
                              const MACROBLOCKD *const xd, BLOCK_SIZE bsize,
                              int plane, int64_t sse, int *rate,
Geza Lore's avatar
Geza Lore committed
848 849 850
                              int64_t *dist) {
  const struct macroblockd_plane *const pd = &xd->plane[plane];
  const int dequant_shift =
851
#if CONFIG_HIGHBITDEPTH
852
      (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 :
853
#endif  // CONFIG_HIGHBITDEPTH
854
                                                    3;
Geza Lore's avatar
Geza Lore committed
855 856 857 858 859 860 861

  // Fast approximate the modelling function.
  if (cpi->sf.simple_model_rd_from_var) {
    const int64_t square_error = sse;
    int quantizer = (pd->dequant[1] >> dequant_shift);

    if (quantizer < 120)
Yaowu Xu's avatar
Yaowu Xu committed
862
      *rate = (int)((square_error * (280 - quantizer)) >>
Yaowu Xu's avatar
Yaowu Xu committed
863
                    (16 - AV1_PROB_COST_SHIFT));
Geza Lore's avatar
Geza Lore committed
864 865 866 867
    else
      *rate = 0;
    *dist = (square_error * quantizer) >> 8;
  } else {
Yaowu Xu's avatar
Yaowu Xu committed
868 869
    av1_model_rd_from_var_lapndz(sse, num_pels_log2_lookup[bsize],
                                 pd->dequant[1] >> dequant_shift, rate, dist);
Geza Lore's avatar
Geza Lore committed
870 871 872 873 874
  }

  *dist <<= 4;
}

Yaowu Xu's avatar
Yaowu Xu committed
875
static void model_rd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
876 877 878 879
                            MACROBLOCK *x, MACROBLOCKD *xd, int plane_from,
                            int plane_to, int *out_rate_sum,
                            int64_t *out_dist_sum, int *skip_txfm_sb,
                            int64_t *skip_sse_sb) {
Jingning Han's avatar
Jingning Han committed
880 881 882
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
Geza Lore's avatar
Geza Lore committed
883 884 885
  int plane;
  const int ref = xd->mi[0]->mbmi.ref_frame[0];

Jingning Han's avatar
Jingning Han committed
886 887 888 889 890 891
  int64_t rate_sum = 0;
  int64_t dist_sum = 0;
  int64_t total_sse = 0;

  x->pred_sse[ref] = 0;

Geza Lore's avatar
Geza Lore committed
892 893 894
  for (plane = plane_from; plane <= plane_to; ++plane) {
    struct macroblock_plane *const p = &x->plane[plane];
    struct macroblockd_plane *const pd = &xd->plane[plane];
Jingning Han's avatar
Jingning Han committed
895
#if CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
896 897
    const BLOCK_SIZE bs = AOMMAX(BLOCK_4X4, get_plane_block_size(bsize, pd));
#else
Jingning Han's avatar
Jingning Han committed
898
    const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
899
#endif  // CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
Jingning Han's avatar
Jingning Han committed
900

Geza Lore's avatar
Geza Lore committed
901 902 903
    unsigned int sse;
    int rate;
    int64_t dist;
Jingning Han's avatar
Jingning Han committed
904

905 906
#if CONFIG_CB4X4
    if (x->skip_chroma_rd && plane) continue;
907
#endif  // CONFIG_CB4X4
908

Geza Lore's avatar
Geza Lore committed
909 910
    // TODO(geza): Write direct sse functions that do not compute
    // variance as well.
911 912
    cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride,
                       &sse);
Jingning Han's avatar
Jingning Han committed
913

914
    if (plane == 0) x->pred_sse[ref] = sse;
Jingning Han's avatar
Jingning Han committed
915

Geza Lore's avatar
Geza Lore committed
916
    total_sse += sse;
Jingning Han's avatar
Jingning Han committed
917

Geza Lore's avatar
Geza Lore committed
918
    model_rd_from_sse(cpi, xd, bs, plane, sse, &rate, &dist);
Geza Lore's avatar
Geza Lore committed
919 920 921

    rate_sum += rate;
    dist_sum += dist;
Jingning Han's avatar
Jingning Han committed
922 923
  }

Geza Lore's avatar
Geza Lore committed
924
  *skip_txfm_sb = total_sse == 0;
Jingning Han's avatar
Jingning Han committed
925 926
  *skip_sse_sb = total_sse << 4;
  *out_rate_sum = (int)rate_sum;
Geza Lore's avatar
Geza Lore committed
927
  *out_dist_sum = dist_sum;
Jingning Han's avatar
Jingning Han committed
928 929
}

Yaowu Xu's avatar
Yaowu Xu committed
930 931
int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
                          intptr_t block_size, int64_t *ssz) {
Jingning Han's avatar
Jingning Han committed
932 933 934 935 936
  int i;
  int64_t error = 0, sqcoeff = 0;

  for (i = 0; i < block_size; i++) {
    const int diff = coeff[i] - dqcoeff[i];
937
    error += diff * diff;
Jingning Han's avatar
Jingning Han committed
938 939 940 941 942 943 944
    sqcoeff += coeff[i] * coeff[i];
  }

  *ssz = sqcoeff;
  return error;
}

Yaowu Xu's avatar
Yaowu Xu committed
945 946
int64_t av1_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff,
                             int block_size) {
Jingning Han's avatar
Jingning Han committed
947 948 949 950 951
  int i;
  int64_t error = 0;

  for (i = 0; i < block_size; i++) {
    const int diff = coeff[i] - dqcoeff[i];
952
    error += diff * diff;
Jingning Han's avatar
Jingning Han committed
953 954 955 956 957
  }

  return error;
}

958
#if CONFIG_HIGHBITDEPTH
Yaowu Xu's avatar
Yaowu Xu committed
959 960 961
int64_t av1_highbd_block_error_c(const tran_low_t *coeff,
                                 const tran_low_t *dqcoeff, intptr_t block_size,
                                 int64_t *ssz, int bd) {
Jingning Han's avatar
Jingning Han committed
962 963 964 965 966 967 968
  int i;
  int64_t error = 0, sqcoeff = 0;
  int shift = 2 * (bd - 8);
  int rounding = shift > 0 ? 1 << (shift - 1) : 0;

  for (i = 0; i < block_size; i++) {
    const int64_t diff = coeff[i] - dqcoeff[i];
969
    error += diff * diff;
Jingning Han's avatar
Jingning Han committed
970 971 972 973 974 975 976 977 978
    sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
  }
  assert(error >= 0 && sqcoeff >= 0);
  error = (error + rounding) >> shift;
  sqcoeff = (sqcoeff + rounding) >> shift;

  *ssz = sqcoeff;
  return error;
}
979
#endif  // CONFIG_HIGHBITDEPTH
Jingning Han's avatar
Jingning Han committed
980

Thomas Daede's avatar
Thomas Daede committed
981 982 983 984 985 986 987 988 989 990
#if CONFIG_PVQ
// Without PVQ, av1_block_error_c() return two kind of errors,
// 1) reconstruction (i.e. decoded) error and
// 2) Squared sum of transformed residue (i.e. 'coeff')
// However, if PVQ is enabled, coeff does not keep the transformed residue
// but instead a transformed original is kept.
// Hence, new parameter ref vector (i.e. transformed predicted signal)
// is required to derive the residue signal,
// i.e. coeff - ref = residue (all transformed).

991
#if CONFIG_HIGHBITDEPTH
Thomas Daede's avatar
Thomas Daede committed
992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028
static int64_t av1_highbd_block_error2_c(const tran_low_t *coeff,
                                         const tran_low_t *dqcoeff,
                                         const tran_low_t *ref,
                                         intptr_t block_size, int64_t *ssz,
                                         int bd) {
  int64_t error;
  int64_t sqcoeff;
  int shift = 2 * (bd - 8);
  int rounding = shift > 0 ? 1 << (shift - 1) : 0;
  // Use the existing sse codes for calculating distortion of decoded signal:
  // i.e. (orig - decoded)^2
  // For high bit depth, throw away ssz until a 32-bit version of
  // av1_block_error_fp is written.
  int64_t ssz_trash;
  error = av1_block_error(coeff, dqcoeff, block_size, &ssz_trash);
  // prediction residue^2 = (orig - ref)^2
  sqcoeff = av1_block_error(coeff, ref, block_size, &ssz_trash);
  error = (error + rounding) >> shift;
  sqcoeff = (sqcoeff + rounding) >> shift;
  *ssz = sqcoeff;
  return error;
}
#else
// TODO(yushin) : Since 4x4 case does not need ssz, better to refactor into
// a separate function that does not do the extra computations for ssz.
static int64_t av1_block_error2_c(const tran_low_t *coeff,
                                  const tran_low_t *dqcoeff,
                                  const tran_low_t *ref, intptr_t block_size,
                                  int64_t *ssz) {
  int64_t error;
  // Use the existing sse codes for calculating distortion of decoded signal:
  // i.e. (orig - decoded)^2
  error = av1_block_error_fp(coeff, dqcoeff, block_size);
  // prediction residue^2 = (orig - ref)^2
  *ssz = av1_block_error_fp(coeff, ref, block_size);
  return error;
}
1029
#endif  // CONFIG_HIGHBITDEPTH
Thomas Daede's avatar
Thomas Daede committed
1030 1031
#endif  // CONFIG_PVQ

1032
#if !CONFIG_PVQ || CONFIG_VAR_TX
1033
/* The trailing '0' is a terminator which is used inside av1_cost_coeffs() to
Jingning Han's avatar
Jingning Han committed
1034 1035 1036 1037
 * decide whether to include cost of a trailing EOB node or not (i.e. we
 * can skip this if the last coefficient in this transform block, e.g. the
 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
 * were non-zero). */
1038 1039 1040 1041 1042
#if !CONFIG_LV_MAP
static int cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane,
                       int block, TX_SIZE tx_size, const SCAN_ORDER *scan_order,
                       const ENTROPY_CONTEXT *a, const ENTROPY_CONTEXT *l,
                       int use_fast_coef_costing) {
Jingning Han's avatar
Jingning Han committed
1043 1044 1045 1046 1047
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
  const struct macroblock_plane *p = &x->plane[plane];
  const struct macroblockd_plane *pd = &xd->plane[plane];
  const PLANE_TYPE type = pd->plane_type;
1048
  const uint16_t *band_count = &band_count_table[tx_size][1];
Jingning Han's avatar
Jingning Han committed
1049 1050
  const int eob = p->eobs[block];
  const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
1051
  const int tx_size_ctx = txsize_sqr_map[tx_size];
1052 1053
  unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
      x->token_costs[tx_size_ctx][type][is_inter_block(mbmi)];
1054
  uint8_t token_cache[MAX_TX_SQUARE];
1055
  int pt = combine_entropy_contexts(*a, *l);
Jingning Han's avatar
Jingning Han committed
1056
  int c, cost;
1057 1058
  const int16_t *scan = scan_order->scan;
  const int16_t *nb = scan_order->neighbors;
1059 1060 1061
  const int ref = is_inter_block(mbmi);
  aom_prob *blockz_probs =
      cm->fc->blockzero_probs[txsize_sqr_map[tx_size]][type][ref];
Thomas Davies's avatar
Thomas Davies committed
1062

1063
#if CONFIG_HIGHBITDEPTH
1064
  const int cat6_bits = av1_get_cat6_extrabits_size(tx_size, xd->bd);
Jingning Han's avatar
Jingning Han committed
1065
#else
1066
  const int cat6_bits = av1_get_cat6_extrabits_size(tx_size, 8);
1067
#endif  // CONFIG_HIGHBITDEPTH
Jingning Han's avatar
Jingning Han committed
1068

1069
#if !CONFIG_VAR_TX && !CONFIG_SUPERTX
Jingning Han's avatar
Jingning Han committed
1070
  // Check for consistency of tx_size with mode info
Angie Chiang's avatar
Angie Chiang committed
1071
  assert(tx_size == get_tx_size(plane, xd));
1072
#endif  // !CONFIG_VAR_TX && !CONFIG_SUPERTX
1073
  (void)cm;
Jingning Han's avatar
Jingning Han committed
1074 1075 1076

  if (eob == 0) {
    // single eob token
1077
    cost = av1_cost_bit(blockz_probs[pt], 0);
Jingning Han's avatar
Jingning Han committed
1078
  } else {
Julia Robson's avatar
Julia Robson committed
1079 1080 1081 1082 1083 1084
    if (use_fast_coef_costing) {
      int band_left = *band_count++;

      // dc token
      int v = qcoeff[0];
      int16_t prev_t;
1085
      cost = av1_get_token_cost(v, &prev_t, cat6_bits);
Thomas Davies's avatar
Thomas Davies committed
1086
      cost += (*token_costs)[!prev_t][pt][prev_t];
Julia Robson's avatar
Julia Robson committed
1087

Yaowu Xu's avatar
Yaowu Xu committed
1088
      token_cache[0] = av1_pt_energy_class[prev_t];
Julia Robson's avatar
Julia Robson committed
1089 1090 1091 1092 1093 1094 1095 1096
      ++token_costs;

      // ac tokens
      for (c = 1; c < eob; c++) {
        const int rc = scan[c];
        int16_t t;

        v = qcoeff[rc];
1097
        cost += av1_get_token_cost(v, &t, cat6_bits);
Thomas Davies's avatar
Thomas Davies committed
1098
        cost += (*token_costs)[!t][!prev_t][t];
Julia Robson's avatar
Julia Robson committed
1099 1100 1101 1102 1103
        prev_t = t;
        if (!--band_left) {
          band_left = *band_count++;
          ++token_costs;
        }
Jingning Han's avatar
Jingning Han committed
1104 1105
      }

Julia Robson's avatar
Julia Robson committed
1106
      // eob token
1107
      cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
Julia Robson's avatar
Julia Robson committed
1108 1109 1110 1111 1112 1113 1114

    } else {  // !use_fast_coef_costing
      int band_left = *band_count++;

      // dc token
      int v = qcoeff[0];
      int16_t tok;
1115
      cost = av1_get_token_cost(v, &tok, cat6_bits);
Thomas Davies's avatar
Thomas Davies committed
1116
      cost += (*token_costs)[!tok][pt][tok];