rdopt.c 468 KB
Newer Older
Jingning Han's avatar
Jingning Han committed
1
/*
Yaowu Xu's avatar
Yaowu Xu committed
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Jingning Han's avatar
Jingning Han committed
3
 *
Yaowu Xu's avatar
Yaowu Xu committed
4 5 6 7 8 9
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Jingning Han's avatar
Jingning Han committed
10 11 12 13 14
 */

#include <assert.h>
#include <math.h>

Yaowu Xu's avatar
Yaowu Xu committed
15
#include "./aom_dsp_rtcd.h"
Jingning Han's avatar
Jingning Han committed
16
#include "./av1_rtcd.h"
Jingning Han's avatar
Jingning Han committed
17

Yaowu Xu's avatar
Yaowu Xu committed
18
#include "aom_dsp/aom_dsp_common.h"
19
#include "aom_dsp/blend.h"
Yaowu Xu's avatar
Yaowu Xu committed
20
#include "aom_mem/aom_mem.h"
21 22
#include "aom_ports/mem.h"
#include "aom_ports/system_state.h"
Jingning Han's avatar
Jingning Han committed
23

24 25 26 27 28 29 30 31 32 33 34 35
#include "av1/common/common.h"
#include "av1/common/common_data.h"
#include "av1/common/entropy.h"
#include "av1/common/entropymode.h"
#include "av1/common/idct.h"
#include "av1/common/mvref_common.h"
#include "av1/common/pred_common.h"
#include "av1/common/quant_common.h"
#include "av1/common/reconinter.h"
#include "av1/common/reconintra.h"
#include "av1/common/scan.h"
#include "av1/common/seg_common.h"
36 37 38
#if CONFIG_LV_MAP
#include "av1/common/txb_common.h"
#endif
Yue Chen's avatar
Yue Chen committed
39 40 41
#if CONFIG_WARPED_MOTION
#include "av1/common/warped_motion.h"
#endif  // CONFIG_WARPED_MOTION
Jingning Han's avatar
Jingning Han committed
42

Jingning Han's avatar
Jingning Han committed
43
#include "av1/encoder/aq_variance.h"
44
#include "av1/encoder/av1_quantize.h"
45 46 47 48
#include "av1/encoder/cost.h"
#include "av1/encoder/encodemb.h"
#include "av1/encoder/encodemv.h"
#include "av1/encoder/encoder.h"
49 50 51
#if CONFIG_LV_MAP
#include "av1/encoder/encodetxb.h"
#endif
52 53
#include "av1/encoder/hybrid_fwd_txfm.h"
#include "av1/encoder/mcomp.h"
54
#if CONFIG_PALETTE
55
#include "av1/encoder/palette.h"
56
#endif  // CONFIG_PALETTE
57 58 59
#include "av1/encoder/ratectrl.h"
#include "av1/encoder/rd.h"
#include "av1/encoder/rdopt.h"
60
#include "av1/encoder/tokenize.h"
61 62
#if CONFIG_PVQ
#include "av1/encoder/pvq_encoder.h"
63
#endif  // CONFIG_PVQ
Yushin Cho's avatar
Yushin Cho committed
64 65
#if CONFIG_PVQ || CONFIG_DAALA_DIST
#include "av1/common/pvq.h"
66
#endif  // CONFIG_PVQ || CONFIG_DAALA_DIST
67
#if CONFIG_DUAL_FILTER
Angie Chiang's avatar
Angie Chiang committed
68 69
#define DUAL_FILTER_SET_SIZE (SWITCHABLE_FILTERS * SWITCHABLE_FILTERS)
static const int filter_sets[DUAL_FILTER_SET_SIZE][2] = {
70 71 72
  { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 }, { 1, 0 }, { 1, 1 },
  { 1, 2 }, { 1, 3 }, { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 },
  { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 },
73
};
Angie Chiang's avatar
Angie Chiang committed
74
#endif  // CONFIG_DUAL_FILTER
75

76 77
#if CONFIG_EXT_REFS

78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
#define LAST_FRAME_MODE_MASK                                      \
  ((1 << INTRA_FRAME) | (1 << LAST2_FRAME) | (1 << LAST3_FRAME) | \
   (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
#define LAST2_FRAME_MODE_MASK                                    \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST3_FRAME) | \
   (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
#define LAST3_FRAME_MODE_MASK                                    \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
   (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
#define GOLDEN_FRAME_MODE_MASK                                   \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
   (1 << LAST3_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
#define BWDREF_FRAME_MODE_MASK                                   \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
   (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME))
#define ALTREF_FRAME_MODE_MASK                                   \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
   (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME))

#else

#define LAST_FRAME_MODE_MASK \
  ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
#define GOLDEN_FRAME_MODE_MASK \
  ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
#define ALTREF_FRAME_MODE_MASK \
  ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | (1 << INTRA_FRAME))
Jingning Han's avatar
Jingning Han committed
105

106 107
#endif  // CONFIG_EXT_REFS

108
#if CONFIG_EXT_REFS
109
#define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | (1 << BWDREF_FRAME) | 0x01)
110
#else
111
#define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | 0x01)
112
#endif  // CONFIG_EXT_REFS
Jingning Han's avatar
Jingning Han committed
113

114 115
#define MIN_EARLY_TERM_INDEX 3
#define NEW_MV_DISCOUNT_FACTOR 8
Jingning Han's avatar
Jingning Han committed
116

117 118 119 120 121
#if CONFIG_EXT_INTRA
#define ANGLE_SKIP_THRESH 10
#define FILTER_FAST_SEARCH 1
#endif  // CONFIG_EXT_INTRA

122 123
const double ADST_FLIP_SVM[8] = { -6.6623, -2.8062, -3.2531, 3.1671,    // vert
                                  -7.7051, -3.2234, -3.6193, 3.4533 };  // horz
124

Jingning Han's avatar
Jingning Han committed
125 126 127 128 129
typedef struct {
  PREDICTION_MODE mode;
  MV_REFERENCE_FRAME ref_frame[2];
} MODE_DEFINITION;

130
typedef struct { MV_REFERENCE_FRAME ref_frame[2]; } REF_DEFINITION;
Jingning Han's avatar
Jingning Han committed
131 132

struct rdcost_block_args {
Yaowu Xu's avatar
Yaowu Xu committed
133
  const AV1_COMP *cpi;
Jingning Han's avatar
Jingning Han committed
134
  MACROBLOCK *x;
135 136
  ENTROPY_CONTEXT t_above[2 * MAX_MIB_SIZE];
  ENTROPY_CONTEXT t_left[2 * MAX_MIB_SIZE];
137
  RD_STATS rd_stats;
Jingning Han's avatar
Jingning Han committed
138 139 140 141 142 143 144
  int64_t this_rd;
  int64_t best_rd;
  int exit_early;
  int use_fast_coef_costing;
};

#define LAST_NEW_MV_INDEX 6
Yaowu Xu's avatar
Yaowu Xu committed
145
static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
Emil Keyder's avatar
Emil Keyder committed
146
  { NEARESTMV, { LAST_FRAME, NONE_FRAME } },
147
#if CONFIG_EXT_REFS
Emil Keyder's avatar
Emil Keyder committed
148 149 150
  { NEARESTMV, { LAST2_FRAME, NONE_FRAME } },
  { NEARESTMV, { LAST3_FRAME, NONE_FRAME } },
  { NEARESTMV, { BWDREF_FRAME, NONE_FRAME } },
151
#endif  // CONFIG_EXT_REFS
Emil Keyder's avatar
Emil Keyder committed
152 153
  { NEARESTMV, { ALTREF_FRAME, NONE_FRAME } },
  { NEARESTMV, { GOLDEN_FRAME, NONE_FRAME } },
Jingning Han's avatar
Jingning Han committed
154

Emil Keyder's avatar
Emil Keyder committed
155
  { DC_PRED, { INTRA_FRAME, NONE_FRAME } },
Jingning Han's avatar
Jingning Han committed
156

Emil Keyder's avatar
Emil Keyder committed
157
  { NEWMV, { LAST_FRAME, NONE_FRAME } },
158
#if CONFIG_EXT_REFS
Emil Keyder's avatar
Emil Keyder committed
159 160 161
  { NEWMV, { LAST2_FRAME, NONE_FRAME } },
  { NEWMV, { LAST3_FRAME, NONE_FRAME } },
  { NEWMV, { BWDREF_FRAME, NONE_FRAME } },
162
#endif  // CONFIG_EXT_REFS
Emil Keyder's avatar
Emil Keyder committed
163 164
  { NEWMV, { ALTREF_FRAME, NONE_FRAME } },
  { NEWMV, { GOLDEN_FRAME, NONE_FRAME } },
Jingning Han's avatar
Jingning Han committed
165

Emil Keyder's avatar
Emil Keyder committed
166
  { NEARMV, { LAST_FRAME, NONE_FRAME } },
167
#if CONFIG_EXT_REFS
Emil Keyder's avatar
Emil Keyder committed
168 169 170
  { NEARMV, { LAST2_FRAME, NONE_FRAME } },
  { NEARMV, { LAST3_FRAME, NONE_FRAME } },
  { NEARMV, { BWDREF_FRAME, NONE_FRAME } },
171
#endif  // CONFIG_EXT_REFS
Emil Keyder's avatar
Emil Keyder committed
172 173
  { NEARMV, { ALTREF_FRAME, NONE_FRAME } },
  { NEARMV, { GOLDEN_FRAME, NONE_FRAME } },
Jingning Han's avatar
Jingning Han committed
174

Emil Keyder's avatar
Emil Keyder committed
175
  { ZEROMV, { LAST_FRAME, NONE_FRAME } },
176
#if CONFIG_EXT_REFS
Emil Keyder's avatar
Emil Keyder committed
177 178 179
  { ZEROMV, { LAST2_FRAME, NONE_FRAME } },
  { ZEROMV, { LAST3_FRAME, NONE_FRAME } },
  { ZEROMV, { BWDREF_FRAME, NONE_FRAME } },
180
#endif  // CONFIG_EXT_REFS
Emil Keyder's avatar
Emil Keyder committed
181 182
  { ZEROMV, { GOLDEN_FRAME, NONE_FRAME } },
  { ZEROMV, { ALTREF_FRAME, NONE_FRAME } },
Jingning Han's avatar
Jingning Han committed
183

184
// TODO(zoeliu): May need to reconsider the order on the modes to check
185

186
#if CONFIG_EXT_INTER
187
  { NEAREST_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
188
#if CONFIG_EXT_REFS
189 190
  { NEAREST_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEAREST_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
191
#endif  // CONFIG_EXT_REFS
192
  { NEAREST_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
193
#if CONFIG_EXT_REFS
194 195 196 197
  { NEAREST_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
198
#endif  // CONFIG_EXT_REFS
199

200
#else  // CONFIG_EXT_INTER
201

202
  { NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
203
#if CONFIG_EXT_REFS
204 205
  { NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
206
#endif  // CONFIG_EXT_REFS
207
  { NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
208
#if CONFIG_EXT_REFS
209 210 211 212
  { NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
213
#endif  // CONFIG_EXT_REFS
214
#endif  // CONFIG_EXT_INTER
Jingning Han's avatar
Jingning Han committed
215

Emil Keyder's avatar
Emil Keyder committed
216
  { TM_PRED, { INTRA_FRAME, NONE_FRAME } },
Jingning Han's avatar
Jingning Han committed
217

218
#if CONFIG_ALT_INTRA
Emil Keyder's avatar
Emil Keyder committed
219
  { SMOOTH_PRED, { INTRA_FRAME, NONE_FRAME } },
220 221
#endif  // CONFIG_ALT_INTRA

222
#if CONFIG_EXT_INTER
223 224 225 226 227 228 229 230 231
  { NEAR_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEAREST_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEAR_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEW_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEAREST_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEW_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEAR_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEW_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
  { ZERO_ZEROMV, { LAST_FRAME, ALTREF_FRAME } },
232

233
#if CONFIG_EXT_REFS
234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252
  { NEAR_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEAREST_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEAR_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEW_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEAREST_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEW_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEAR_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEW_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
  { ZERO_ZEROMV, { LAST2_FRAME, ALTREF_FRAME } },

  { NEAR_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEAREST_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEAR_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEW_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEAREST_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEW_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEAR_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEW_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
  { ZERO_ZEROMV, { LAST3_FRAME, ALTREF_FRAME } },
253
#endif  // CONFIG_EXT_REFS
254

255 256 257 258 259 260 261 262 263
  { NEAR_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEAREST_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEAR_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEW_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEAREST_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEW_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEAR_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEW_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { ZERO_ZEROMV, { GOLDEN_FRAME, ALTREF_FRAME } },
264 265

#if CONFIG_EXT_REFS
266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304
  { NEAR_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEAR_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEW_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEAREST_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEW_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEAR_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEW_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
  { ZERO_ZEROMV, { LAST_FRAME, BWDREF_FRAME } },

  { NEAR_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEAR_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEW_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEAREST_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEW_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEAR_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEW_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
  { ZERO_ZEROMV, { LAST2_FRAME, BWDREF_FRAME } },

  { NEAR_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEAR_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEW_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEAREST_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEW_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEAR_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEW_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
  { ZERO_ZEROMV, { LAST3_FRAME, BWDREF_FRAME } },

  { NEAR_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEAR_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEW_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEAREST_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEW_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEAR_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEW_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { ZERO_ZEROMV, { GOLDEN_FRAME, BWDREF_FRAME } },
305
#endif  // CONFIG_EXT_REFS
306 307 308

#else  // CONFIG_EXT_INTER

309 310
  { NEARMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEWMV, { LAST_FRAME, ALTREF_FRAME } },
311
#if CONFIG_EXT_REFS
312 313 314 315
  { NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
316
#endif  // CONFIG_EXT_REFS
317 318
  { NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
319 320

#if CONFIG_EXT_REFS
321 322 323 324 325 326 327 328
  { NEARMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEWMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
329
#endif  // CONFIG_EXT_REFS
Jingning Han's avatar
Jingning Han committed
330

331
  { ZEROMV, { LAST_FRAME, ALTREF_FRAME } },
332
#if CONFIG_EXT_REFS
333 334
  { ZEROMV, { LAST2_FRAME, ALTREF_FRAME } },
  { ZEROMV, { LAST3_FRAME, ALTREF_FRAME } },
335
#endif  // CONFIG_EXT_REFS
336
  { ZEROMV, { GOLDEN_FRAME, ALTREF_FRAME } },
337 338

#if CONFIG_EXT_REFS
339 340 341 342
  { ZEROMV, { LAST_FRAME, BWDREF_FRAME } },
  { ZEROMV, { LAST2_FRAME, BWDREF_FRAME } },
  { ZEROMV, { LAST3_FRAME, BWDREF_FRAME } },
  { ZEROMV, { GOLDEN_FRAME, BWDREF_FRAME } },
343
#endif  // CONFIG_EXT_REFS
344

345
#endif  // CONFIG_EXT_INTER
Jingning Han's avatar
Jingning Han committed
346

Emil Keyder's avatar
Emil Keyder committed
347 348 349 350 351 352 353 354
  { H_PRED, { INTRA_FRAME, NONE_FRAME } },
  { V_PRED, { INTRA_FRAME, NONE_FRAME } },
  { D135_PRED, { INTRA_FRAME, NONE_FRAME } },
  { D207_PRED, { INTRA_FRAME, NONE_FRAME } },
  { D153_PRED, { INTRA_FRAME, NONE_FRAME } },
  { D63_PRED, { INTRA_FRAME, NONE_FRAME } },
  { D117_PRED, { INTRA_FRAME, NONE_FRAME } },
  { D45_PRED, { INTRA_FRAME, NONE_FRAME } },
355 356

#if CONFIG_EXT_INTER
357 358 359 360
  { ZEROMV, { LAST_FRAME, INTRA_FRAME } },
  { NEARESTMV, { LAST_FRAME, INTRA_FRAME } },
  { NEARMV, { LAST_FRAME, INTRA_FRAME } },
  { NEWMV, { LAST_FRAME, INTRA_FRAME } },
361 362

#if CONFIG_EXT_REFS
363 364 365 366 367 368 369 370 371
  { ZEROMV, { LAST2_FRAME, INTRA_FRAME } },
  { NEARESTMV, { LAST2_FRAME, INTRA_FRAME } },
  { NEARMV, { LAST2_FRAME, INTRA_FRAME } },
  { NEWMV, { LAST2_FRAME, INTRA_FRAME } },

  { ZEROMV, { LAST3_FRAME, INTRA_FRAME } },
  { NEARESTMV, { LAST3_FRAME, INTRA_FRAME } },
  { NEARMV, { LAST3_FRAME, INTRA_FRAME } },
  { NEWMV, { LAST3_FRAME, INTRA_FRAME } },
372 373
#endif  // CONFIG_EXT_REFS

374 375 376 377
  { ZEROMV, { GOLDEN_FRAME, INTRA_FRAME } },
  { NEARESTMV, { GOLDEN_FRAME, INTRA_FRAME } },
  { NEARMV, { GOLDEN_FRAME, INTRA_FRAME } },
  { NEWMV, { GOLDEN_FRAME, INTRA_FRAME } },
378

379
#if CONFIG_EXT_REFS
380 381 382 383
  { ZEROMV, { BWDREF_FRAME, INTRA_FRAME } },
  { NEARESTMV, { BWDREF_FRAME, INTRA_FRAME } },
  { NEARMV, { BWDREF_FRAME, INTRA_FRAME } },
  { NEWMV, { BWDREF_FRAME, INTRA_FRAME } },
384 385
#endif  // CONFIG_EXT_REFS

386 387 388 389
  { ZEROMV, { ALTREF_FRAME, INTRA_FRAME } },
  { NEARESTMV, { ALTREF_FRAME, INTRA_FRAME } },
  { NEARMV, { ALTREF_FRAME, INTRA_FRAME } },
  { NEWMV, { ALTREF_FRAME, INTRA_FRAME } },
390
#endif  // CONFIG_EXT_INTER
Jingning Han's avatar
Jingning Han committed
391 392
};

Yaowu Xu's avatar
Yaowu Xu committed
393
static const REF_DEFINITION av1_ref_order[MAX_REFS] = {
Emil Keyder's avatar
Emil Keyder committed
394
  { { LAST_FRAME, NONE_FRAME } },
395
#if CONFIG_EXT_REFS
Emil Keyder's avatar
Emil Keyder committed
396 397
  { { LAST2_FRAME, NONE_FRAME } },    { { LAST3_FRAME, NONE_FRAME } },
  { { BWDREF_FRAME, NONE_FRAME } },
398
#endif  // CONFIG_EXT_REFS
Emil Keyder's avatar
Emil Keyder committed
399
  { { GOLDEN_FRAME, NONE_FRAME } },   { { ALTREF_FRAME, NONE_FRAME } },
400

401
  { { LAST_FRAME, ALTREF_FRAME } },
402
#if CONFIG_EXT_REFS
403
  { { LAST2_FRAME, ALTREF_FRAME } },  { { LAST3_FRAME, ALTREF_FRAME } },
404
#endif  // CONFIG_EXT_REFS
405
  { { GOLDEN_FRAME, ALTREF_FRAME } },
406 407

#if CONFIG_EXT_REFS
408 409
  { { LAST_FRAME, BWDREF_FRAME } },   { { LAST2_FRAME, BWDREF_FRAME } },
  { { LAST3_FRAME, BWDREF_FRAME } },  { { GOLDEN_FRAME, BWDREF_FRAME } },
410
#endif  // CONFIG_EXT_REFS
411

Emil Keyder's avatar
Emil Keyder committed
412
  { { INTRA_FRAME, NONE_FRAME } },
Jingning Han's avatar
Jingning Han committed
413 414
};

415
#if CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA || CONFIG_PALETTE
hui su's avatar
hui su committed
416
static INLINE int write_uniform_cost(int n, int v) {
417 418
  const int l = get_unsigned_bits(n);
  const int m = (1 << l) - n;
419
  if (l == 0) return 0;
hui su's avatar
hui su committed
420
  if (v < m)
Yaowu Xu's avatar
Yaowu Xu committed
421
    return (l - 1) * av1_cost_bit(128, 0);
hui su's avatar
hui su committed
422
  else
Yaowu Xu's avatar
Yaowu Xu committed
423
    return l * av1_cost_bit(128, 0);
hui su's avatar
hui su committed
424
}
425
#endif  // CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA || CONFIG_PALETTE
hui su's avatar
hui su committed
426

427 428 429
// constants for prune 1 and prune 2 decision boundaries
#define FAST_EXT_TX_CORR_MID 0.0
#define FAST_EXT_TX_EDST_MID 0.1
430 431 432 433
#define FAST_EXT_TX_CORR_MARGIN 0.5
#define FAST_EXT_TX_EDST_MARGIN 0.3

static const TX_TYPE_1D vtx_tab[TX_TYPES] = {
434
  DCT_1D,      ADST_1D, DCT_1D,      ADST_1D,
435
#if CONFIG_EXT_TX
436 437
  FLIPADST_1D, DCT_1D,  FLIPADST_1D, ADST_1D, FLIPADST_1D, IDTX_1D,
  DCT_1D,      IDTX_1D, ADST_1D,     IDTX_1D, FLIPADST_1D, IDTX_1D,
438 439 440 441
#endif  // CONFIG_EXT_TX
};

static const TX_TYPE_1D htx_tab[TX_TYPES] = {
442
  DCT_1D,  DCT_1D,      ADST_1D,     ADST_1D,
443
#if CONFIG_EXT_TX
444 445
  DCT_1D,  FLIPADST_1D, FLIPADST_1D, FLIPADST_1D, ADST_1D, IDTX_1D,
  IDTX_1D, DCT_1D,      IDTX_1D,     ADST_1D,     IDTX_1D, FLIPADST_1D,
446 447
#endif  // CONFIG_EXT_TX
};
448

Yushin Cho's avatar
Yushin Cho committed
449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469
#if CONFIG_DAALA_DIST
static int od_compute_var_4x4(od_coeff *x, int stride) {
  int sum;
  int s2;
  int i;
  sum = 0;
  s2 = 0;
  for (i = 0; i < 4; i++) {
    int j;
    for (j = 0; j < 4; j++) {
      int t;

      t = x[i * stride + j];
      sum += t;
      s2 += t * t;
    }
  }
  // TODO(yushin) : Check wheter any changes are required for high bit depth.
  return (s2 - (sum * sum >> 4)) >> 4;
}

470 471 472 473 474 475 476
/* OD_DIST_LP_MID controls the frequency weighting filter used for computing
   the distortion. For a value X, the filter is [1 X 1]/(X + 2) and
   is applied both horizontally and vertically. For X=5, the filter is
   a good approximation for the OD_QM8_Q4_HVS quantization matrix. */
#define OD_DIST_LP_MID (5)
#define OD_DIST_LP_NORM (OD_DIST_LP_MID + 2)

Yushin Cho's avatar
Yushin Cho committed
477
static double od_compute_dist_8x8(int qm, int use_activity_masking, od_coeff *x,
478
                                  od_coeff *y, od_coeff *e_lp, int stride) {
Yushin Cho's avatar
Yushin Cho committed
479 480 481 482 483 484 485 486 487 488 489 490
  double sum;
  int min_var;
  double mean_var;
  double var_stat;
  double activity;
  double calibration;
  int i;
  int j;
  double vardist;

  vardist = 0;
  OD_ASSERT(qm != OD_FLAT_QM);
491
  (void)qm;
Yushin Cho's avatar
Yushin Cho committed
492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522
#if 1
  min_var = INT_MAX;
  mean_var = 0;
  for (i = 0; i < 3; i++) {
    for (j = 0; j < 3; j++) {
      int varx;
      int vary;
      varx = od_compute_var_4x4(x + 2 * i * stride + 2 * j, stride);
      vary = od_compute_var_4x4(y + 2 * i * stride + 2 * j, stride);
      min_var = OD_MINI(min_var, varx);
      mean_var += 1. / (1 + varx);
      /* The cast to (double) is to avoid an overflow before the sqrt.*/
      vardist += varx - 2 * sqrt(varx * (double)vary) + vary;
    }
  }
  /* We use a different variance statistic depending on whether activity
     masking is used, since the harmonic mean appeared slghtly worse with
     masking off. The calibration constant just ensures that we preserve the
     rate compared to activity=1. */
  if (use_activity_masking) {
    calibration = 1.95;
    var_stat = 9. / mean_var;
  } else {
    calibration = 1.62;
    var_stat = min_var;
  }
  /* 1.62 is a calibration constant, 0.25 is a noise floor and 1/6 is the
     activity masking constant. */
  activity = calibration * pow(.25 + var_stat, -1. / 6);
#else
  activity = 1;
523
#endif  // 1
Yushin Cho's avatar
Yushin Cho committed
524 525 526
  sum = 0;
  for (i = 0; i < 8; i++) {
    for (j = 0; j < 8; j++)
527
      sum += e_lp[i * stride + j] * (double)e_lp[i * stride + j];
Yushin Cho's avatar
Yushin Cho committed
528
  }
529 530 531
  /* Normalize the filter to unit DC response. */
  sum *= 1. / (OD_DIST_LP_NORM * OD_DIST_LP_NORM * OD_DIST_LP_NORM *
               OD_DIST_LP_NORM);
Yushin Cho's avatar
Yushin Cho committed
532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551
  return activity * activity * (sum + vardist);
}

// Note : Inputs x and y are in a pixel domain
static double od_compute_dist(int qm, int activity_masking, od_coeff *x,
                              od_coeff *y, int bsize_w, int bsize_h,
                              int qindex) {
  int i;
  double sum;
  sum = 0;

  assert(bsize_w >= 8 && bsize_h >= 8);

  if (qm == OD_FLAT_QM) {
    for (i = 0; i < bsize_w * bsize_h; i++) {
      double tmp;
      tmp = x[i] - y[i];
      sum += tmp * tmp;
    }
  } else {
552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583
    int j;
    DECLARE_ALIGNED(16, od_coeff, e[MAX_TX_SQUARE]);
    DECLARE_ALIGNED(16, od_coeff, tmp[MAX_TX_SQUARE]);
    DECLARE_ALIGNED(16, od_coeff, e_lp[MAX_TX_SQUARE]);
    int mid = OD_DIST_LP_MID;
    for (i = 0; i < bsize_h; i++) {
      for (j = 0; j < bsize_w; j++) {
        e[i * bsize_w + j] = x[i * bsize_w + j] - y[i * bsize_w + j];
      }
    }
    for (i = 0; i < bsize_h; i++) {
      tmp[i * bsize_w] = mid * e[i * bsize_w] + 2 * e[i * bsize_w + 1];
      tmp[i * bsize_w + bsize_w - 1] =
          mid * e[i * bsize_w + bsize_w - 1] + 2 * e[i * bsize_w + bsize_w - 2];
      for (j = 1; j < bsize_w - 1; j++) {
        tmp[i * bsize_w + j] = mid * e[i * bsize_w + j] +
                               e[i * bsize_w + j - 1] + e[i * bsize_w + j + 1];
      }
    }
    for (j = 0; j < bsize_w; j++) {
      e_lp[j] = mid * tmp[j] + 2 * tmp[bsize_w + j];
      e_lp[(bsize_h - 1) * bsize_w + j] =
          mid * tmp[(bsize_h - 1) * bsize_w + j] +
          2 * tmp[(bsize_h - 2) * bsize_w + j];
    }
    for (i = 1; i < bsize_h - 1; i++) {
      for (j = 0; j < bsize_w; j++) {
        e_lp[i * bsize_w + j] = mid * tmp[i * bsize_w + j] +
                                tmp[(i - 1) * bsize_w + j] +
                                tmp[(i + 1) * bsize_w + j];
      }
    }
Yushin Cho's avatar
Yushin Cho committed
584 585 586
    for (i = 0; i < bsize_h; i += 8) {
      for (j = 0; j < bsize_w; j += 8) {
        sum += od_compute_dist_8x8(qm, activity_masking, &x[i * bsize_w + j],
587 588
                                   &y[i * bsize_w + j], &e_lp[i * bsize_w + j],
                                   bsize_w);
Yushin Cho's avatar
Yushin Cho committed
589 590
      }
    }
591 592 593 594 595 596 597 598 599 600 601
    /* Scale according to linear regression against SSE, for 8x8 blocks. */
    if (activity_masking) {
      sum *= 2.2 + (1.7 - 2.2) * (qindex - 99) / (210 - 99) +
             (qindex < 99 ? 2.5 * (qindex - 99) / 99 * (qindex - 99) / 99 : 0);
    } else {
      sum *= qindex >= 128
                 ? 1.4 + (0.9 - 1.4) * (qindex - 128) / (209 - 128)
                 : qindex <= 43
                       ? 1.5 + (2.0 - 1.5) * (qindex - 43) / (16 - 43)
                       : 1.5 + (1.4 - 1.5) * (qindex - 43) / (128 - 43);
    }
Yushin Cho's avatar
Yushin Cho committed
602 603 604 605 606
  }
  return sum;
}

static int64_t av1_daala_dist(const uint8_t *src, int src_stride,
607 608 609
                              const uint8_t *dst, int dst_stride, int bsw,
                              int bsh, int qm, int use_activity_masking,
                              int qindex) {
Yushin Cho's avatar
Yushin Cho committed
610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626
  int i, j;
  int64_t d;
  DECLARE_ALIGNED(16, od_coeff, orig[MAX_TX_SQUARE]);
  DECLARE_ALIGNED(16, od_coeff, rec[MAX_TX_SQUARE]);

  assert(qm == OD_HVS_QM);

  for (j = 0; j < bsh; j++)
    for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i];

  for (j = 0; j < bsh; j++)
    for (i = 0; i < bsw; i++) rec[j * bsw + i] = dst[j * dst_stride + i];

  d = (int64_t)od_compute_dist(qm, use_activity_masking, orig, rec, bsw, bsh,
                               qindex);
  return d;
}
627
#endif  // CONFIG_DAALA_DIST
Yushin Cho's avatar
Yushin Cho committed
628

Yaowu Xu's avatar
Yaowu Xu committed
629
static void get_energy_distribution_fine(const AV1_COMP *cpi, BLOCK_SIZE bsize,
630 631
                                         const uint8_t *src, int src_stride,
                                         const uint8_t *dst, int dst_stride,
632
                                         double *hordist, double *verdist) {
633 634
  const int bw = block_size_wide[bsize];
  const int bh = block_size_high[bsize];
635
  unsigned int esq[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
636

637
  const int f_index = bsize - BLOCK_16X16;
638
  if (f_index < 0) {
639 640
    const int w_shift = bw == 8 ? 1 : 2;
    const int h_shift = bh == 8 ? 1 : 2;
641
#if CONFIG_HIGHBITDEPTH
642
    if (cpi->common.use_highbitdepth) {
643 644 645 646 647
      const uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
      const uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
      for (int i = 0; i < bh; ++i)
        for (int j = 0; j < bw; ++j) {
          const int index = (j >> w_shift) + ((i >> h_shift) << 2);
648 649 650
          esq[index] +=
              (src16[j + i * src_stride] - dst16[j + i * dst_stride]) *
              (src16[j + i * src_stride] - dst16[j + i * dst_stride]);
651 652
        }
    } else {
653
#endif  // CONFIG_HIGHBITDEPTH
654

655 656 657
      for (int i = 0; i < bh; ++i)
        for (int j = 0; j < bw; ++j) {
          const int index = (j >> w_shift) + ((i >> h_shift) << 2);
658 659 660
          esq[index] += (src[j + i * src_stride] - dst[j + i * dst_stride]) *
                        (src[j + i * src_stride] - dst[j + i * dst_stride]);
        }
661
#if CONFIG_HIGHBITDEPTH
662
    }
663
#endif  // CONFIG_HIGHBITDEPTH
664
  } else {
665 666 667 668 669 670 671
    cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[0]);
    cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
                            &esq[1]);
    cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
                            &esq[2]);
    cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
                            dst_stride, &esq[3]);
672 673 674
    src += bh / 4 * src_stride;
    dst += bh / 4 * dst_stride;

675 676 677 678 679 680 681
    cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[4]);
    cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
                            &esq[5]);
    cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
                            &esq[6]);
    cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
                            dst_stride, &esq[7]);
682 683 684
    src += bh / 4 * src_stride;
    dst += bh / 4 * dst_stride;

685 686 687 688 689 690 691
    cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[8]);
    cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
                            &esq[9]);
    cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
                            &esq[10]);
    cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
                            dst_stride, &esq[11]);
692 693 694
    src += bh / 4 * src_stride;
    dst += bh / 4 * dst_stride;

695 696 697 698 699 700 701
    cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[12]);
    cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
                            &esq[13]);
    cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
                            &esq[14]);
    cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
                            dst_stride, &esq[15]);
702 703
  }

704 705 706
  double total = (double)esq[0] + esq[1] + esq[2] + esq[3] + esq[4] + esq[5] +
                 esq[6] + esq[7] + esq[8] + esq[9] + esq[10] + esq[11] +
                 esq[12] + esq[13] + esq[14] + esq[15];
707 708
  if (total > 0) {
    const double e_recip = 1.0 / total;
709 710 711 712 713 714
    hordist[0] = ((double)esq[0] + esq[4] + esq[8] + esq[12]) * e_recip;
    hordist[1] = ((double)esq[1] + esq[5] + esq[9] + esq[13]) * e_recip;
    hordist[2] = ((double)esq[2] + esq[6] + esq[10] + esq[14]) * e_recip;
    verdist[0] = ((double)esq[0] + esq[1] + esq[2] + esq[3]) * e_recip;
    verdist[1] = ((double)esq[4] + esq[5] + esq[6] + esq[7]) * e_recip;
    verdist[2] = ((double)esq[8] + esq[9] + esq[10] + esq[11]) * e_recip;
715 716 717 718 719 720 721
  } else {
    hordist[0] = verdist[0] = 0.25;
    hordist[1] = verdist[1] = 0.25;
    hordist[2] = verdist[2] = 0.25;
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
722
static int adst_vs_flipadst(const AV1_COMP *cpi, BLOCK_SIZE bsize, uint8_t *src,
Alex Converse's avatar
Alex Converse committed
723
                            int src_stride, uint8_t *dst, int dst_stride) {
724 725
  int prune_bitmask = 0;
  double svm_proj_h = 0, svm_proj_v = 0;
Alex Converse's avatar
Alex Converse committed
726
  double hdist[3] = { 0, 0, 0 }, vdist[3] = { 0, 0, 0 };
727 728
  get_energy_distribution_fine(cpi, bsize, src, src_stride, dst, dst_stride,
                               hdist, vdist);
729

730
  svm_proj_v = vdist[0] * ADST_FLIP_SVM[0] + vdist[1] * ADST_FLIP_SVM[1] +
731
               vdist[2] * ADST_FLIP_SVM[2] + ADST_FLIP_SVM[3];
732
  svm_proj_h = hdist[0] * ADST_FLIP_SVM[4] + hdist[1] * ADST_FLIP_SVM[5] +
733 734 735 736 737 738 739 740 741 742 743 744 745 746 747
               hdist[2] * ADST_FLIP_SVM[6] + ADST_FLIP_SVM[7];
  if (svm_proj_v > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN)
    prune_bitmask |= 1 << FLIPADST_1D;
  else if (svm_proj_v < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN)
    prune_bitmask |= 1 << ADST_1D;

  if (svm_proj_h > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN)
    prune_bitmask |= 1 << (FLIPADST_1D + 8);
  else if (svm_proj_h < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN)
    prune_bitmask |= 1 << (ADST_1D + 8);

  return prune_bitmask;
}

#if CONFIG_EXT_TX
Alex Converse's avatar
Alex Converse committed
748 749
static void get_horver_correlation(const int16_t *diff, int stride, int w,
                                   int h, double *hcorr, double *vcorr) {
750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776
  // Returns hor/ver correlation coefficient
  const int num = (h - 1) * (w - 1);
  double num_r;
  int i, j;
  int64_t xy_sum = 0, xz_sum = 0;
  int64_t x_sum = 0, y_sum = 0, z_sum = 0;
  int64_t x2_sum = 0, y2_sum = 0, z2_sum = 0;
  double x_var_n, y_var_n, z_var_n, xy_var_n, xz_var_n;
  *hcorr = *vcorr = 1;

  assert(num > 0);
  num_r = 1.0 / num;
  for (i = 1; i < h; ++i) {
    for (j = 1; j < w; ++j) {
      const int16_t x = diff[i * stride + j];
      const int16_t y = diff[i * stride + j - 1];
      const int16_t z = diff[(i - 1) * stride + j];
      xy_sum += x * y;
      xz_sum += x * z;
      x_sum += x;
      y_sum += y;
      z_sum += z;
      x2_sum += x * x;
      y2_sum += y * y;
      z2_sum += z * z;
    }
  }
777 778 779
  x_var_n = x2_sum - (x_sum * x_sum) * num_r;
  y_var_n = y2_sum - (y_sum * y_sum) * num_r;
  z_var_n = z2_sum - (z_sum * z_sum) * num_r;
780 781 782 783 784 785 786 787 788 789 790 791
  xy_var_n = xy_sum - (x_sum * y_sum) * num_r;
  xz_var_n = xz_sum - (x_sum * z_sum) * num_r;
  if (x_var_n > 0 && y_var_n > 0) {
    *hcorr = xy_var_n / sqrt(x_var_n * y_var_n);
    *hcorr = *hcorr < 0 ? 0 : *hcorr;
  }
  if (x_var_n > 0 && z_var_n > 0) {
    *vcorr = xz_var_n / sqrt(x_var_n * z_var_n);
    *vcorr = *vcorr < 0 ? 0 : *vcorr;
  }
}

Alex Converse's avatar
Alex Converse committed
792 793
int dct_vs_idtx(const int16_t *diff, int stride, int w, int h) {
  double hcorr, vcorr;
794
  int prune_bitmask = 0;
Alex Converse's avatar
Alex Converse committed
795
  get_horver_correlation(diff, stride, w, h, &hcorr, &vcorr);
796

Alex Converse's avatar
Alex Converse committed
797
  if (vcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN)
798
    prune_bitmask |= 1 << IDTX_1D;
Alex Converse's avatar
Alex Converse committed
799
  else if (vcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN)
800 801
    prune_bitmask |= 1 << DCT_1D;

Alex Converse's avatar
Alex Converse committed
802
  if (hcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN)
803
    prune_bitmask |= 1 << (IDTX_1D + 8);
Alex Converse's avatar
Alex Converse committed
804
  else if (hcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN)
805 806 807 808 809
    prune_bitmask |= 1 << (DCT_1D + 8);
  return prune_bitmask;
}

// Performance drop: 0.5%, Speed improvement: 24%
Yaowu Xu's avatar
Yaowu Xu committed
810
static int prune_two_for_sby(const AV1_COMP *cpi, BLOCK_SIZE bsize,
Alex Converse's avatar
Alex Converse committed
811 812
                             MACROBLOCK *x, const MACROBLOCKD *xd,
                             int adst_flipadst, int dct_idtx) {
813
  int prune = 0;
814

Alex Converse's avatar
Alex Converse committed
815 816 817
  if (adst_flipadst) {
    const struct macroblock_plane *const p = &x->plane[0];
    const struct macroblockd_plane *const pd = &xd->plane[0];
818
    prune |= adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride,
Alex Converse's avatar
Alex Converse committed
819 820 821 822 823 824 825 826 827
                              pd->dst.buf, pd->dst.stride);
  }
  if (dct_idtx) {
    av1_subtract_plane(x, bsize, 0);
    const struct macroblock_plane *const p = &x->plane[0];
    const int bw = 4 << (b_width_log2_lookup[bsize]);
    const int bh = 4 << (b_height_log2_lookup[bsize]);
    prune |= dct_vs_idtx(p->src_diff, bw, bw, bh);
  }
828 829 830

  return prune;
}
831 832
#endif  // CONFIG_EXT_TX

833
// Performance drop: 0.3%, Speed improvement: 5%
Yaowu Xu's avatar
Yaowu Xu committed
834
static int prune_one_for_sby(const AV1_COMP *cpi, BLOCK_SIZE bsize,
Alex Converse's avatar
Alex Converse committed
835 836 837
                             const MACROBLOCK *x, const MACROBLOCKD *xd) {
  const struct macroblock_plane *const p = &x->plane[0];
  const struct macroblockd_plane *const pd = &xd->plane[0];
838
  return adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride, pd->dst.buf,
Alex Converse's avatar
Alex Converse committed
839
                          pd->dst.stride);
840 841
}

Yaowu Xu's avatar
Yaowu Xu committed
842
static int prune_tx_types(const AV1_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x,
843 844 845 846
                          MACROBLOCKD *xd, int tx_set) {
#if CONFIG_EXT_TX
  const int *tx_set_1D = ext_tx_used_inter_1D[tx_set];
#else
847
  const int tx_set_1D[TX_TYPES_1D] = { 0 };
848
#endif  // CONFIG_EXT_TX
849

850
  switch (cpi->sf.tx_type_search.prune_mode) {
851 852
    case NO_PRUNE: return 0; break;
    case PRUNE_ONE:
853
      if ((tx_set >= 0) && !(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D]))
854
        return 0;
855 856
      return prune_one_for_sby(cpi, bsize, x, xd);
      break;
857 858
#if CONFIG_EXT_TX
    case PRUNE_TWO:
859
      if ((tx_set >= 0) && !(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D])) {
860
        if (!(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D])) return 0;
861 862
        return prune_two_for_sby(cpi, bsize, x, xd, 0, 1);
      }
863
      if ((tx_set >= 0) && !(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D]))
864 865
        return prune_two_for_sby(cpi, bsize, x, xd, 1, 0);
      return prune_two_for_sby(cpi, bsize, x, xd, 1, 1);
866
      break;
867
#endif  // CONFIG_EXT_TX
868 869 870 871 872
  }
  assert(0);
  return 0;
}

873
static int do_tx_type_search(TX_TYPE tx_type, int prune) {
874 875 876
// TODO(sarahparker) implement for non ext tx
#if CONFIG_EXT_TX
  return !(((prune >> vtx_tab[tx_type]) & 1) |
877
           ((prune >> (htx_tab[tx_type] + 8)) & 1));
878 879
#else
  // temporary to avoid compiler warnings
880 881 882 883
  (void)vtx_tab;
  (void)htx_tab;
  (void)tx_type;
  (void)prune;
884
  return 1;
885
#endif  // CONFIG_EXT_TX
886 887
}

Yaowu Xu's avatar
Yaowu Xu committed
888
static void model_rd_from_sse(const AV1_COMP *const cpi,
889 890
                              const MACROBLOCKD *const xd, BLOCK_SIZE bsize,
                              int plane, int64_t sse, int *rate,
Geza Lore's avatar
Geza Lore committed
891 892 893
                              int64_t *dist) {
  const struct macroblockd_plane *const pd = &xd->plane[plane];
  const int dequant_shift =
894
#if CONFIG_HIGHBITDEPTH
895
      (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 :
896
#endif  // CONFIG_HIGHBITDEPTH
897
                                                    3;
Geza Lore's avatar
Geza Lore committed
898 899 900 901 902 903 904

  // Fast approximate the modelling function.
  if (cpi->sf.simple_model_rd_from_var) {
    const int64_t square_error = sse;
    int quantizer = (pd->dequant[1] >> dequant_shift);

    if (quantizer < 120)
Yaowu Xu's avatar
Yaowu Xu committed
905
      *rate = (int)((square_error * (280 - quantizer)) >>
Yaowu Xu's avatar
Yaowu Xu committed
906
                    (16 - AV1_PROB_COST_SHIFT));
Geza Lore's avatar
Geza Lore committed
907 908 909 910
    else
      *rate = 0;
    *dist = (square_error * quantizer) >> 8;
  } else {
Yaowu Xu's avatar
Yaowu Xu committed
911 912
    av1_model_rd_from_var_lapndz(sse, num_pels_log2_lookup[bsize],
                                 pd->dequant[1] >> dequant_shift, rate, dist);
Geza Lore's avatar
Geza Lore committed
913 914 915 916 917
  }

  *dist <<= 4;
}

Yaowu Xu's avatar
Yaowu Xu committed
918
static void model_rd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
919 920 921 922
                            MACROBLOCK *x, MACROBLOCKD *xd, int plane_from,
                            int plane_to, int *out_rate_sum,
                            int64_t *out_dist_sum, int *skip_txfm_sb,
                            int64_t *skip_sse_sb) {
Jingning Han's avatar
Jingning Han committed
923 924 925
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
Geza Lore's avatar
Geza Lore committed
926 927 928
  int plane;
  const int ref = xd->mi[0]->mbmi.ref_frame[0];

Jingning Han's avatar
Jingning Han committed
929 930 931 932 933 934
  int64_t rate_sum = 0;
  int64_t dist_sum = 0;
  int64_t total_sse = 0;

  x->pred_sse[ref] = 0;

Geza Lore's avatar
Geza Lore committed
935 936 937
  for (plane = plane_from; plane <= plane_to; ++plane) {
    struct macroblock_plane *const p = &x->plane[plane];
    struct macroblockd_plane *const pd = &xd->plane[plane];
Jingning Han's avatar
Jingning Han committed
938
#if CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
939 940
    const BLOCK_SIZE bs = AOMMAX(BLOCK_4X4, get_plane_block_size(bsize, pd));
#else
Jingning Han's avatar
Jingning Han committed
941
    const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
942
#endif  // CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
Jingning Han's avatar
Jingning Han committed
943

Geza Lore's avatar
Geza Lore committed
944 945 946
    unsigned int sse;
    int rate;
    int64_t dist;
Jingning Han's avatar
Jingning Han committed
947

948 949
#if CONFIG_CB4X4
    if (x->skip_chroma_rd && plane) continue;
950
#endif  // CONFIG_CB4X4
951

Geza Lore's avatar
Geza Lore committed
952 953
    // TODO(geza): Write direct sse functions that do not compute
    // variance as well.
954 955
    cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride,
                       &sse);
Jingning Han's avatar
Jingning Han committed
956

957
    if (plane == 0) x->pred_sse[ref] = sse;
Jingning Han's avatar
Jingning Han committed
958

Geza Lore's avatar
Geza Lore committed
959
    total_sse += sse;
Jingning Han's avatar
Jingning Han committed
960

Geza Lore's avatar
Geza Lore committed
961
    model_rd_from_sse(cpi, xd, bs, plane, sse, &rate, &dist);
Geza Lore's avatar
Geza Lore committed
962 963 964

    rate_sum += rate;
    dist_sum += dist;
Jingning Han's avatar
Jingning Han committed
965 966
  }

Geza Lore's avatar
Geza Lore committed
967
  *skip_txfm_sb = total_sse == 0;
Jingning Han's avatar
Jingning Han committed
968 969
  *skip_sse_sb = total_sse << 4;
  *out_rate_sum = (int)rate_sum;
Geza Lore's avatar
Geza Lore committed
970
  *out_dist_sum = dist_sum;
Jingning Han's avatar
Jingning Han committed
971 972
}

Yaowu Xu's avatar
Yaowu Xu committed
973 974
int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
                          intptr_t block_size, int64_t *ssz) {
Jingning Han's avatar
Jingning Han committed
975 976 977 978 979
  int i;
  int64_t error = 0, sqcoeff = 0;

  for (i = 0; i < block_size; i++) {
    const int diff = coeff[i] - dqcoeff[i];
980
    error += diff * diff;
Jingning Han's avatar
Jingning Han committed
981 982 983 984 985 986 987
    sqcoeff += coeff[i] * coeff[i];
  }

  *ssz = sqcoeff;
  return error;
}

Yaowu Xu's avatar
Yaowu Xu committed
988 989
int64_t av1_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff,
                             int block_size) {
Jingning Han's avatar
Jingning Han committed
990 991 992 993 994
  int i;
  int64_t error = 0;

  for (i = 0; i < block_size; i++) {
    const int diff = coeff[i] - dqcoeff[i];
995
    error += diff * diff;
Jingning Han's avatar
Jingning Han committed
996 997 998 999 1000
  }

  return error;
}

1001
#if CONFIG_HIGHBITDEPTH
Yaowu Xu's avatar
Yaowu Xu committed
1002 1003 1004
int64_t av1_highbd_block_error_c(const tran_low_t *coeff,
                                 const tran_low_t *dqcoeff, intptr_t block_size,
                                 int64_t *ssz, int bd) {
Jingning Han's avatar
Jingning Han committed
1005 1006 1007 1008 1009 1010 1011
  int i;
  int64_t error = 0, sqcoeff = 0;
  int shift = 2 * (bd - 8);
  int rounding = shift > 0 ? 1 << (shift - 1) : 0;

  for (i = 0; i < block_size; i++) {
    const int64_t diff = coeff[i] - dqcoeff[i];
1012
    error += diff * diff;
Jingning Han's avatar
Jingning Han committed
1013 1014 1015 1016 1017 1018 1019 1020 1021
    sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
  }
  assert(error >= 0 && sqcoeff >= 0);
  error = (error + rounding) >> shift;
  sqcoeff = (sqcoeff + rounding) >> shift;

  *ssz = sqcoeff;
  return error;
}
1022
#endif  // CONFIG_HIGHBITDEPTH
Jingning Han's avatar
Jingning Han committed
1023

Thomas Daede's avatar
Thomas Daede committed
1024 1025 1026 1027 1028 1029 1030 1031 1032 1033
#if CONFIG_PVQ
// Without PVQ, av1_block_error_c() return two kind of errors,
// 1) reconstruction (i.e. decoded) error and
// 2) Squared sum of transformed residue (i.e. 'coeff')
// However, if PVQ is enabled, coeff does not keep the transformed residue
// but instead a transformed original is kept.
// Hence, new parameter ref vector (i.e. transformed predicted signal)
// is required to derive the residue signal,
// i.e. coeff - ref = residue (all transformed).

1034
#if CONFIG_HIGHBITDEPTH
Thomas Daede's avatar
Thomas Daede committed
1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071
static int64_t av1_highbd_block_error2_c(const tran_low_t *coeff,
                                         const tran_low_t *dqcoeff,
                                         const tran_low_t *ref,
                                         intptr_t block_size, int64_t *ssz,
                                         int bd) {
  int64_t error;
  int64_t sqcoeff;
  int shift = 2 * (bd - 8);
  int rounding = shift > 0 ? 1 << (shift - 1) : 0;
  // Use the existing sse codes for calculating distortion of decoded signal:
  // i.e. (orig - decoded)^2
  // For high bit depth, throw away ssz until a 32-bit version of
  // av1_block_error_fp is written.
  int64_t ssz_trash;
  error = av1_block_error(coeff, dqcoeff, block_size, &ssz_trash);
  // prediction residue^2 = (orig - ref)^2
  sqcoeff = av1_block_error(coeff, ref, block_size, &ssz_trash);
  error = (error + rounding) >> shift;
  sqcoeff = (sqcoeff + rounding) >> shift;
  *ssz = sqcoeff;
  return error;
}
#else
// TODO(yushin) : Since 4x4 case does not need ssz, better to refactor into
// a separate function that does not do the extra computations for ssz.
static int64_t av1_block_error2_c(const tran_low_t *coeff,
                                  const tran_low_t *dqcoeff,
                                  const tran_low_t *ref, intptr_t block_size,
                                  int64_t *ssz) {
  int64_t error;
  // Use the existing sse codes for calculating distortion of decoded signal:
  // i.e. (orig - decoded)^2
  error = av1_block_error_fp(coeff, dqcoeff, block_size);
  // prediction residue^2 = (orig - ref)^2
  *ssz = av1_block_error_fp(coeff, ref, block_size);
  return error;
}
1072
#endif  // CONFIG_HIGHBITDEPTH
Thomas Daede's avatar
Thomas Daede committed
1073 1074
#endif  // CONFIG_PVQ

1075
#if !CONFIG_PVQ || CONFIG_VAR_TX
1076
/* The trailing '0' is a terminator which is used inside av1_cost_coeffs() to
Jingning Han's avatar
Jingning Han committed
1077 1078 1079 1080
 * decide whether to include cost of a trailing EOB node or not (i.e. we
 * can skip this if the last coefficient in this transform block, e.g. the
 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
 * were non-zero). */
1081 1082 1083 1084 1085
#if !CONFIG_LV_MAP
static int cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane,
                       int block, TX_SIZE tx_size, const SCAN_ORDER *scan_order,
                       const ENTROPY_CONTEXT *a, const ENTROPY_CONTEXT *l,
                       int use_fast_coef_costing) {
Jingning Han's avatar
Jingning Han committed
1086 1087 1088 1089 1090
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
  const struct macroblock_plane *p = &x->plane[plane];
  const struct macroblockd_plane *pd = &xd->plane[plane];
  const PLANE_TYPE type = pd->plane_type;
1091
  const uint16_t *band_count = &band_count_table[tx_size][1];
Jingning Han's avatar
Jingning Han committed
1092 1093
  const int eob = p->eobs[block];
  const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
1094
  const int tx_size_ctx = txsize_sqr_map[tx_size];
1095 1096
  unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
      x->token_costs[tx_size_ctx][type][is_inter_block(mbmi)];
1097
  uint8_t token_cache[MAX_TX_SQUARE];
1098
  int pt = combine_entropy_contexts(*a, *l);
Jingning Han's avatar
Jingning Han committed
1099
  int c, cost;
1100 1101
  const int16_t *scan = scan_order->scan;
  const int16_t *nb = scan_order->neighbors;
1102 1103 1104 1105
#if CONFIG_NEW_TOKENSET
  const int ref = is_inter_block(mbmi);
  aom_prob *blockz_probs =
      cm->fc->blockzero_probs[txsize_sqr_map[tx_size]][type][ref];
Thomas Davies's avatar
Thomas Davies committed
1106

1107
#endif  // CONFIG_NEW_TOKENSET
1108

1109
#if CONFIG_HIGHBITDEPTH
1110
  const int cat6_bits = av1_get_cat6_extrabits_size(tx_size, xd->bd);
Jingning Han's avatar
Jingning Han committed
1111
#else
1112
  const int cat6_bits = av1_get_cat6_extrabits_size(tx_size, 8);
1113
#endif  // CONFIG_HIGHBITDEPTH
Jingning Han's avatar
Jingning Han committed
1114

1115
#if !CONFIG_VAR_TX && !CONFIG_SUPERTX
Jingning Han's avatar
Jingning Han committed
1116
  // Check for consistency of tx_size with mode info
Angie Chiang's avatar
Angie Chiang committed
1117
  assert(tx_size == get_tx_size(plane, xd));
1118
#endif  // !CONFIG_VAR_TX && !CONFIG_SUPERTX
1119
  (void)cm;
Jingning Han's avatar
Jingning Han committed
1120 1121

  if (eob == 0) {
1122
#if CONFIG_NEW_TOKENSET
Jingning Han's avatar
Jingning Han committed
1123
    // single eob token
1124 1125
    cost = av1_cost_bit(blockz_probs[pt], 0);
#else
Jingning Han's avatar
Jingning Han committed
1126
    cost = token_costs[0][0][pt][EOB_TOKEN];
1127
#endif  // CONFIG_NEW_TOKENSET
Jingning Han's avatar
Jingning Han committed
1128
  } else {
Julia Robson's avatar
Julia Robson committed
1129 1130 1131 1132 1133 1134
    if (use_fast_coef_costing) {
      int band_left = *band_count++;

      // dc token
      int v = qcoeff[0];
      int16_t prev_t;
1135
      cost = av1_get_token_cost(v, &prev_t, cat6_bits);
Thomas Davies's avatar
Thomas Davies committed
1136 1137 1138
#if CONFIG_NEW_TOKENSET
      cost += (*token_costs)[!prev_t][pt][prev_t];
#else
Julia Robson's avatar
Julia Robson committed
1139
      cost += (*token_costs)[0][pt][prev_t];
Thomas Davies's avatar
Thomas Davies committed
1140
#endif
Julia Robson's avatar
Julia Robson committed
1141

Yaowu Xu's avatar
Yaowu Xu committed
1142
      token_cache[0] = av1_pt_energy_class[prev_t];
Julia Robson's avatar
Julia Robson committed
1143 1144 1145 1146 1147 1148 1149 1150
      ++token_costs;

      // ac tokens
      for (c = 1; c < eob; c++) {
        const int rc = scan[c];
        int16_t t;

        v = qcoeff[rc];
1151
        cost += av1_get_token_cost(v, &t, cat6_bits);
Thomas Davies's avatar
Thomas Davies committed
1152 1153 1154
#if CONFIG_NEW_TOKENSET
        cost += (*token_costs)[!t][!prev_t][t];
#else
Julia Robson's avatar
Julia Robson committed
1155
        cost += (*token_costs)[!prev_t][!prev_t][t];
Thomas Davies's avatar
Thomas Davies committed
1156
#endif
Julia Robson's avatar
Julia Robson committed
1157 1158 1159 1160 1161
        prev_t = t;
        if (!--band_left) {
          band_left = *band_count++;
          ++token_costs;
        }
Jingning Han's avatar
Jingning Han committed
1162 1163
      }

Julia Robson's avatar
Julia Robson committed
1164
      // eob token
Thomas Davies's avatar
Thomas Davies committed
1165 1166
      if (band_left || CONFIG_NEW_TOKENSET)
        cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
Julia Robson's avatar
Julia Robson committed
1167 1168 1169 1170 1171 1172 1173

    } else {  // !use_fast_coef_costing
      int band_left = *band_count++;

      // dc token
      int v = qcoeff[0];
      int16_t tok;
Thomas Davies's avatar
Thomas Davies committed
1174
#if !CONFIG_NEW_TOKENSET
1175
      unsigned int(*tok_cost_ptr)[COEFF_CONTEXTS][ENTROPY_TOKENS];
Thomas Davies's avatar
Thomas Davies committed
1176
#endif
1177
      cost = av1_get_token_cost(v, &tok, cat6_bits);
Thomas Davies's avatar
Thomas Davies committed
1178 1179 1180
#if CONFIG_NEW_TOKENSET
      cost += (*token_costs)[!tok][pt][tok];
#else
Julia Robson's avatar
Julia Robson committed
1181
      cost += (*token_costs)[0][pt][tok];
Thomas Davies's avatar
Thomas Davies committed
1182
#endif
Julia Robson's avatar
Julia Robson committed
1183

Yaowu Xu's avatar
Yaowu Xu committed
1184
      token_cache[0] = av1_pt_energy_class[tok];
Julia Robson's avatar
Julia Robson committed
1185 1186
      ++token_costs;

Thomas Davies's avatar
Thomas Davies committed
1187
#if !CONFIG_NEW_TOKENSET
Julia Robson's avatar
Julia Robson committed
1188
      tok_cost_ptr = &((*token_costs)[!tok]);
Thomas Davies's avatar
Thomas Davies committed
1189
#endif
Julia Robson's avatar
Julia Robson committed
1190 1191 1192 1193 1194 1195

      // ac tokens
      for (c = 1; c < eob; c++) {
        const int rc = scan[c];

        v = qcoeff[rc];
1196
        cost += av1_get_token_cost(v, &tok, cat6_bits);
Julia Robson's avatar
Julia Robson committed
1197
        pt = get_coef_context(nb, token_cache, c);
Thomas Davies's avatar
Thomas Davies committed
1198 1199 1200
#if CONFIG_NEW_TOKENSET
        cost += (*token_costs)[!tok][pt][tok];
#else
Julia Robson's avatar
Julia Robson committed
1201
        cost += (*tok_cost_ptr)[pt][tok];
Thomas Davies's avatar
Thomas Davies committed
1202
#endif
Yaowu Xu's avatar
Yaowu Xu committed
1203
        token_cache[rc] = av1_pt_energy_class[tok];
Julia Robson's avatar
Julia Robson committed
1204 1205 1206 1207
        if (!--band_left) {
          band_left = *band_count++;
          ++token_costs;
        }
Thomas Davies's avatar
Thomas Davies committed
1208
#if !CONFIG_NEW_TOKENSET
Julia Robson's avatar
Julia Robson committed
1209
        tok_cost_ptr = &((*token_costs)[!tok]);
Thomas Davies's avatar
Thomas Davies committed
1210
#endif
Julia Robson's avatar
Julia Robson committed
1211 1212 1213
      }

      // eob token
Thomas Davies's avatar
Thomas Davies committed
1214
      if (band_left || CONFIG_NEW_TOKENSET) {
Jingning Han's avatar
Jingning Han committed
1215 1216 1217 1218 1219 1220 1221 1222
        pt = get_coef_context(nb, token_cache, c);
        cost += (*token_costs)[0][pt][EOB_TOKEN];
      }
    }
  }

  return cost;
}
1223 1224
#endif  // !CONFIG_LV_MAP

1225
int av1_cost_coeffs(const AV1_COMP *const cpi, MACROBLOCK *x, int plane,
1226 1227 1228 1229
                    int block, TX_SIZE tx_size, const SCAN_ORDER *scan_order,
                    const ENTROPY_CONTEXT *a, const ENTROPY_CONTEXT *l,
                    int use_fast_coef_costing) {
#if !CONFIG_LV_MAP
1230
  const AV1_COMMON *const cm = &cpi->common;
1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253
  return cost_coeffs(cm, x, plane, block, tx_size, scan_order, a, l,
                     use_fast_coef_costing);
#else  // !CONFIG_LV_MAP
  (void)scan_order;
  (void)use_fast_coef_costing;
  const MACROBLOCKD *xd = &x->e_mbd;
  const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
  const struct macroblockd_plane *pd = &xd->plane[plane];
  const BLOCK_SIZE bsize = mbmi->sb_type;
#if CONFIG_CB4X4
#if CONFIG_CHROMA_2X2
  const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
#else
  const BLOCK_SIZE plane_bsize =
      AOMMAX(BLOCK_4X4, get_plane_block_size(bsize, pd));
#endif  // CONFIG_CHROMA_2X2
#else   // CONFIG_CB4X4
  const BLOCK_SIZE plane_bsize =
      get_plane_block_size(AOMMAX(BLOCK_8X8, bsize), pd);
#endif  // CONFIG_CB4X4

  TXB_CTX txb_ctx;
  get_txb_ctx(plane_bsize, tx_size, plane, a, l, &txb_ctx);
1254
  return av1_cost_coeffs_txb(cpi, x, plane, block, &txb_ctx);
1255 1256
#endif  // !CONFIG_LV_MAP
}
Fergus Simpson's avatar
Fergus Simpson committed
1257
#endif  // !CONFIG_PVQ || CONFIG_VAR_TX
Jingning Han's avatar
Jingning Han committed
1258

Alex Converse's avatar
Alex Converse committed
1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304
// Get transform block visible dimensions cropped to the MI units.
static void get_txb_dimensions(const MACROBLOCKD *xd, int plane,
                               BLOCK_SIZE plane_bsize, int blk_row, int blk_col,
                               BLOCK_SIZE tx_bsize, int *width, int *height,
                               int *visible_width, int *visible_height) {
  assert(tx_bsize <= plane_bsize);
  int txb_height = block_size_high[tx_bsize];
  int txb_width = block_size_wide[tx_bsize];
  const int block_height = block_size_high[plane_bsize];
  const int block_width = block_size_wide[plane_bsize];
  const struct macroblockd_plane *const pd = &xd->plane[plane];
  // TODO(aconverse@google.com): Investigate using crop_width/height here rather
  // than the MI size
  const int block_rows =
      (xd->mb_to_bottom_edge >= 0)
          ? block_height
          : (xd->mb_to_bottom_edge >> (3 + pd->subsampling_y)) + block_height;
  const int block_cols =
      (xd->mb_to_right_edge >= 0)
          ? block_width
          : (xd->mb_to_right_edge >> (3 + pd->subsampling_x)) + block_width;
  const int tx_unit_size = tx_size_wide_log2[0];
  if (width) *width = txb_width;
  if (height) *height = txb_height;
  *visible_width = clamp(block_cols - (blk_col << tx_unit_size), 0, txb_width);
  *visible_height =
      clamp(block_rows - (blk_row << tx_unit_size), 0, txb_height);
}

// Compute the pixel domain sum square error on all visible 4x4s in the
// transform block.
static unsigned pixel_sse(const AV1_COMP *const cpi, const MACROBLOCKD *xd,
                          int plane, const uint8_t *src, const int src_stride,
                          const uint8_t *dst, const int dst_stride, int blk_row,
                          int blk_col, const BLOCK_SIZE plane_bsize,
                          const BLOCK_SIZE tx_bsize) {
  int txb_rows, txb_cols, visible_rows, visible_cols;
  get_txb_dimensions(xd, plane, plane_bsize, blk_row, blk_col, tx_bsize,
                     &txb_cols, &txb_rows, &visible_cols, &visible_rows);
  assert(visible_rows > 0);
  assert(visible_cols > 0);
  if (txb_rows == visible_rows && txb_cols == visible_cols) {
    unsigned sse;
    cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &sse);
    return sse;
  }
1305
#if CONFIG_HIGHBITDEPTH
Alex Converse's avatar
Alex Converse committed
1306 1307 1308 1309 1310
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    uint64_t sse = aom_highbd_sse_odd_size(src, src_stride, dst, dst_stride,
                                           visible_cols, visible_rows);
    return (unsigned int)ROUND_POWER_OF_TWO(sse, (xd->bd - 8) * 2);
  }
1311
#endif  // CONFIG_HIGHBITDEPTH
Alex Converse's avatar
Alex Converse committed
1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328
  unsigned sse = aom_sse_odd_size(src, src_stride, dst, dst_stride,
                                  visible_cols, visible_rows);
  return sse;
}

// Compute the squares sum squares on all visible 4x4s in the transform block.
static int64_t sum_squares_visible(const MACROBLOCKD *xd, int plane,
                                   const int16_t *diff, const int diff_stride,
                                   int blk_row, int blk_col,
                                   const BLOCK_SIZE plane_bsize,
                                   const BLOCK_SIZE tx_bsize) {
  int visible_rows, visible_cols;
  get_txb_dimensions(xd, plane, plane_bsize, blk_row, blk_col, tx_bsize, NULL,
                     NULL, &visible_cols, &visible_rows);
  return aom_sum_squares_2d_i16(diff, diff_stride, visible_cols, visible_rows);
}

Angie Chiang's avatar
Angie Chiang committed
1329 1330 1331 1332
void av1_dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
                    BLOCK_SIZE plane_bsize, int block, int blk_row, int blk_col,
                    TX_SIZE tx_size, int64_t *out_dist, int64_t *out_sse,
                    OUTPUT_STATUS output_status) {
1333
  MACROBLOCKD *const xd = &x->e_mbd;
Alex Converse's avatar
Alex Converse committed
1334
  const struct macroblock_plane *const p = &x->plane[plane];
Yushin Cho's avatar
Yushin Cho committed
1335 1336 1337 1338 1339
#if CONFIG_DAALA_DIST
  int qm = OD_HVS_QM;
  int use_activity_masking = 0;
#if CONFIG_PVQ
  use_activity_masking = x->daala_enc.use_activity_masking;
1340
#endif  // CONFIG_PVQ
1341 1342 1343
  struct macroblockd_plane *const pd = &xd->plane[plane];
#else   // CONFIG_DAALA_DIST
  const struct macroblockd_plane *const pd = &xd->plane[plane];
1344
#endif  // CONFIG_DAALA_DIST
Yushin Cho's avatar
Yushin Cho committed
1345 1346

  if (cpi->sf.use_transform_domain_distortion && !CONFIG_DAALA_DIST) {