rdopt.c 462 KB
Newer Older
Jingning Han's avatar
Jingning Han committed
1
/*
Yaowu Xu's avatar
Yaowu Xu committed
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Jingning Han's avatar
Jingning Han committed
3
 *
Yaowu Xu's avatar
Yaowu Xu committed
4 5 6 7 8 9
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Jingning Han's avatar
Jingning Han committed
10 11 12 13 14
 */

#include <assert.h>
#include <math.h>

Yaowu Xu's avatar
Yaowu Xu committed
15
#include "./aom_dsp_rtcd.h"
Jingning Han's avatar
Jingning Han committed
16
#include "./av1_rtcd.h"
Jingning Han's avatar
Jingning Han committed
17

Yaowu Xu's avatar
Yaowu Xu committed
18
#include "aom_dsp/aom_dsp_common.h"
19
#include "aom_dsp/blend.h"
Yaowu Xu's avatar
Yaowu Xu committed
20
#include "aom_mem/aom_mem.h"
21 22
#include "aom_ports/mem.h"
#include "aom_ports/system_state.h"
Jingning Han's avatar
Jingning Han committed
23

24 25 26 27 28 29 30 31 32 33 34 35
#include "av1/common/common.h"
#include "av1/common/common_data.h"
#include "av1/common/entropy.h"
#include "av1/common/entropymode.h"
#include "av1/common/idct.h"
#include "av1/common/mvref_common.h"
#include "av1/common/pred_common.h"
#include "av1/common/quant_common.h"
#include "av1/common/reconinter.h"
#include "av1/common/reconintra.h"
#include "av1/common/scan.h"
#include "av1/common/seg_common.h"
36 37 38
#if CONFIG_LV_MAP
#include "av1/common/txb_common.h"
#endif
Yue Chen's avatar
Yue Chen committed
39 40 41
#if CONFIG_WARPED_MOTION
#include "av1/common/warped_motion.h"
#endif  // CONFIG_WARPED_MOTION
Jingning Han's avatar
Jingning Han committed
42

Jingning Han's avatar
Jingning Han committed
43
#include "av1/encoder/aq_variance.h"
44
#include "av1/encoder/av1_quantize.h"
45 46 47 48
#include "av1/encoder/cost.h"
#include "av1/encoder/encodemb.h"
#include "av1/encoder/encodemv.h"
#include "av1/encoder/encoder.h"
49 50 51
#if CONFIG_LV_MAP
#include "av1/encoder/encodetxb.h"
#endif
52 53
#include "av1/encoder/hybrid_fwd_txfm.h"
#include "av1/encoder/mcomp.h"
54
#if CONFIG_PALETTE
55
#include "av1/encoder/palette.h"
56
#endif  // CONFIG_PALETTE
57 58 59
#include "av1/encoder/ratectrl.h"
#include "av1/encoder/rd.h"
#include "av1/encoder/rdopt.h"
60
#include "av1/encoder/tokenize.h"
61 62
#if CONFIG_PVQ
#include "av1/encoder/pvq_encoder.h"
63
#endif  // CONFIG_PVQ
Yushin Cho's avatar
Yushin Cho committed
64 65
#if CONFIG_PVQ || CONFIG_DAALA_DIST
#include "av1/common/pvq.h"
66
#endif  // CONFIG_PVQ || CONFIG_DAALA_DIST
67
#if CONFIG_DUAL_FILTER
Angie Chiang's avatar
Angie Chiang committed
68 69
#define DUAL_FILTER_SET_SIZE (SWITCHABLE_FILTERS * SWITCHABLE_FILTERS)
static const int filter_sets[DUAL_FILTER_SET_SIZE][2] = {
70 71 72
  { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 }, { 1, 0 }, { 1, 1 },
  { 1, 2 }, { 1, 3 }, { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 },
  { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 },
73
};
Angie Chiang's avatar
Angie Chiang committed
74
#endif  // CONFIG_DUAL_FILTER
75

76 77
#if CONFIG_EXT_REFS

78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
#define LAST_FRAME_MODE_MASK                                      \
  ((1 << INTRA_FRAME) | (1 << LAST2_FRAME) | (1 << LAST3_FRAME) | \
   (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
#define LAST2_FRAME_MODE_MASK                                    \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST3_FRAME) | \
   (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
#define LAST3_FRAME_MODE_MASK                                    \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
   (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
#define GOLDEN_FRAME_MODE_MASK                                   \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
   (1 << LAST3_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
#define BWDREF_FRAME_MODE_MASK                                   \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
   (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME))
#define ALTREF_FRAME_MODE_MASK                                   \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
   (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME))

#else

#define LAST_FRAME_MODE_MASK \
  ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
#define GOLDEN_FRAME_MODE_MASK \
  ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
#define ALTREF_FRAME_MODE_MASK \
  ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | (1 << INTRA_FRAME))
Jingning Han's avatar
Jingning Han committed
105

106 107
#endif  // CONFIG_EXT_REFS

108
#if CONFIG_EXT_REFS
109
#define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | (1 << BWDREF_FRAME) | 0x01)
110
#else
111
#define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | 0x01)
112
#endif  // CONFIG_EXT_REFS
Jingning Han's avatar
Jingning Han committed
113

114 115
#define MIN_EARLY_TERM_INDEX 3
#define NEW_MV_DISCOUNT_FACTOR 8
Jingning Han's avatar
Jingning Han committed
116

117 118 119 120 121
#if CONFIG_EXT_INTRA
#define ANGLE_SKIP_THRESH 10
#define FILTER_FAST_SEARCH 1
#endif  // CONFIG_EXT_INTRA

122 123
const double ADST_FLIP_SVM[8] = { -6.6623, -2.8062, -3.2531, 3.1671,    // vert
                                  -7.7051, -3.2234, -3.6193, 3.4533 };  // horz
124

Jingning Han's avatar
Jingning Han committed
125 126 127 128 129
typedef struct {
  PREDICTION_MODE mode;
  MV_REFERENCE_FRAME ref_frame[2];
} MODE_DEFINITION;

130
typedef struct { MV_REFERENCE_FRAME ref_frame[2]; } REF_DEFINITION;
Jingning Han's avatar
Jingning Han committed
131 132

struct rdcost_block_args {
Yaowu Xu's avatar
Yaowu Xu committed
133
  const AV1_COMP *cpi;
Jingning Han's avatar
Jingning Han committed
134
  MACROBLOCK *x;
135 136
  ENTROPY_CONTEXT t_above[2 * MAX_MIB_SIZE];
  ENTROPY_CONTEXT t_left[2 * MAX_MIB_SIZE];
137
  RD_STATS rd_stats;
Jingning Han's avatar
Jingning Han committed
138 139 140 141 142 143 144
  int64_t this_rd;
  int64_t best_rd;
  int exit_early;
  int use_fast_coef_costing;
};

#define LAST_NEW_MV_INDEX 6
Yaowu Xu's avatar
Yaowu Xu committed
145
static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
Emil Keyder's avatar
Emil Keyder committed
146
  { NEARESTMV, { LAST_FRAME, NONE_FRAME } },
147
#if CONFIG_EXT_REFS
Emil Keyder's avatar
Emil Keyder committed
148 149 150
  { NEARESTMV, { LAST2_FRAME, NONE_FRAME } },
  { NEARESTMV, { LAST3_FRAME, NONE_FRAME } },
  { NEARESTMV, { BWDREF_FRAME, NONE_FRAME } },
151
#endif  // CONFIG_EXT_REFS
Emil Keyder's avatar
Emil Keyder committed
152 153
  { NEARESTMV, { ALTREF_FRAME, NONE_FRAME } },
  { NEARESTMV, { GOLDEN_FRAME, NONE_FRAME } },
Jingning Han's avatar
Jingning Han committed
154

Emil Keyder's avatar
Emil Keyder committed
155
  { DC_PRED, { INTRA_FRAME, NONE_FRAME } },
Jingning Han's avatar
Jingning Han committed
156

Emil Keyder's avatar
Emil Keyder committed
157
  { NEWMV, { LAST_FRAME, NONE_FRAME } },
158
#if CONFIG_EXT_REFS
Emil Keyder's avatar
Emil Keyder committed
159 160 161
  { NEWMV, { LAST2_FRAME, NONE_FRAME } },
  { NEWMV, { LAST3_FRAME, NONE_FRAME } },
  { NEWMV, { BWDREF_FRAME, NONE_FRAME } },
162
#endif  // CONFIG_EXT_REFS
Emil Keyder's avatar
Emil Keyder committed
163 164
  { NEWMV, { ALTREF_FRAME, NONE_FRAME } },
  { NEWMV, { GOLDEN_FRAME, NONE_FRAME } },
Jingning Han's avatar
Jingning Han committed
165

Emil Keyder's avatar
Emil Keyder committed
166
  { NEARMV, { LAST_FRAME, NONE_FRAME } },
167
#if CONFIG_EXT_REFS
Emil Keyder's avatar
Emil Keyder committed
168 169 170
  { NEARMV, { LAST2_FRAME, NONE_FRAME } },
  { NEARMV, { LAST3_FRAME, NONE_FRAME } },
  { NEARMV, { BWDREF_FRAME, NONE_FRAME } },
171
#endif  // CONFIG_EXT_REFS
Emil Keyder's avatar
Emil Keyder committed
172 173
  { NEARMV, { ALTREF_FRAME, NONE_FRAME } },
  { NEARMV, { GOLDEN_FRAME, NONE_FRAME } },
Jingning Han's avatar
Jingning Han committed
174

Emil Keyder's avatar
Emil Keyder committed
175
  { ZEROMV, { LAST_FRAME, NONE_FRAME } },
176
#if CONFIG_EXT_REFS
Emil Keyder's avatar
Emil Keyder committed
177 178 179
  { ZEROMV, { LAST2_FRAME, NONE_FRAME } },
  { ZEROMV, { LAST3_FRAME, NONE_FRAME } },
  { ZEROMV, { BWDREF_FRAME, NONE_FRAME } },
180
#endif  // CONFIG_EXT_REFS
Emil Keyder's avatar
Emil Keyder committed
181 182
  { ZEROMV, { GOLDEN_FRAME, NONE_FRAME } },
  { ZEROMV, { ALTREF_FRAME, NONE_FRAME } },
Jingning Han's avatar
Jingning Han committed
183

184
// TODO(zoeliu): May need to reconsider the order on the modes to check
185

186
#if CONFIG_EXT_INTER
187
  { NEAREST_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
188
#if CONFIG_EXT_REFS
189 190
  { NEAREST_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEAREST_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
191
#endif  // CONFIG_EXT_REFS
192
  { NEAREST_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
193
#if CONFIG_EXT_REFS
194 195 196 197
  { NEAREST_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
198
#endif  // CONFIG_EXT_REFS
199

200
#else  // CONFIG_EXT_INTER
201

202
  { NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
203
#if CONFIG_EXT_REFS
204 205
  { NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
206
#endif  // CONFIG_EXT_REFS
207
  { NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
208
#if CONFIG_EXT_REFS
209 210 211 212
  { NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
213
#endif  // CONFIG_EXT_REFS
214
#endif  // CONFIG_EXT_INTER
Jingning Han's avatar
Jingning Han committed
215

Emil Keyder's avatar
Emil Keyder committed
216
  { TM_PRED, { INTRA_FRAME, NONE_FRAME } },
Jingning Han's avatar
Jingning Han committed
217

218
#if CONFIG_ALT_INTRA
Emil Keyder's avatar
Emil Keyder committed
219
  { SMOOTH_PRED, { INTRA_FRAME, NONE_FRAME } },
220 221
#endif  // CONFIG_ALT_INTRA

222
#if CONFIG_EXT_INTER
223 224 225 226 227 228 229 230 231
  { NEAR_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEAREST_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEAR_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEW_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEAREST_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEW_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEAR_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEW_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
  { ZERO_ZEROMV, { LAST_FRAME, ALTREF_FRAME } },
232

233
#if CONFIG_EXT_REFS
234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252
  { NEAR_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEAREST_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEAR_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEW_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEAREST_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEW_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEAR_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEW_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
  { ZERO_ZEROMV, { LAST2_FRAME, ALTREF_FRAME } },

  { NEAR_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEAREST_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEAR_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEW_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEAREST_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEW_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEAR_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEW_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
  { ZERO_ZEROMV, { LAST3_FRAME, ALTREF_FRAME } },
253
#endif  // CONFIG_EXT_REFS
254

255 256 257 258 259 260 261 262 263
  { NEAR_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEAREST_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEAR_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEW_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEAREST_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEW_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEAR_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEW_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { ZERO_ZEROMV, { GOLDEN_FRAME, ALTREF_FRAME } },
264 265

#if CONFIG_EXT_REFS
266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304
  { NEAR_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEAR_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEW_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEAREST_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEW_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEAR_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEW_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
  { ZERO_ZEROMV, { LAST_FRAME, BWDREF_FRAME } },

  { NEAR_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEAR_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEW_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEAREST_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEW_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEAR_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEW_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
  { ZERO_ZEROMV, { LAST2_FRAME, BWDREF_FRAME } },

  { NEAR_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEAR_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEW_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEAREST_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEW_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEAR_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEW_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
  { ZERO_ZEROMV, { LAST3_FRAME, BWDREF_FRAME } },

  { NEAR_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEAR_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEW_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEAREST_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEW_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEAR_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEW_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { ZERO_ZEROMV, { GOLDEN_FRAME, BWDREF_FRAME } },
305
#endif  // CONFIG_EXT_REFS
306 307 308

#else  // CONFIG_EXT_INTER

309 310
  { NEARMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEWMV, { LAST_FRAME, ALTREF_FRAME } },
311
#if CONFIG_EXT_REFS
312 313 314 315
  { NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
316
#endif  // CONFIG_EXT_REFS
317 318
  { NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
319 320

#if CONFIG_EXT_REFS
321 322 323 324 325 326 327 328
  { NEARMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEWMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
329
#endif  // CONFIG_EXT_REFS
Jingning Han's avatar
Jingning Han committed
330

331
  { ZEROMV, { LAST_FRAME, ALTREF_FRAME } },
332
#if CONFIG_EXT_REFS
333 334
  { ZEROMV, { LAST2_FRAME, ALTREF_FRAME } },
  { ZEROMV, { LAST3_FRAME, ALTREF_FRAME } },
335
#endif  // CONFIG_EXT_REFS
336
  { ZEROMV, { GOLDEN_FRAME, ALTREF_FRAME } },
337 338

#if CONFIG_EXT_REFS
339 340 341 342
  { ZEROMV, { LAST_FRAME, BWDREF_FRAME } },
  { ZEROMV, { LAST2_FRAME, BWDREF_FRAME } },
  { ZEROMV, { LAST3_FRAME, BWDREF_FRAME } },
  { ZEROMV, { GOLDEN_FRAME, BWDREF_FRAME } },
343
#endif  // CONFIG_EXT_REFS
344

345
#endif  // CONFIG_EXT_INTER
Jingning Han's avatar
Jingning Han committed
346

Emil Keyder's avatar
Emil Keyder committed
347 348 349 350 351 352 353 354
  { H_PRED, { INTRA_FRAME, NONE_FRAME } },
  { V_PRED, { INTRA_FRAME, NONE_FRAME } },
  { D135_PRED, { INTRA_FRAME, NONE_FRAME } },
  { D207_PRED, { INTRA_FRAME, NONE_FRAME } },
  { D153_PRED, { INTRA_FRAME, NONE_FRAME } },
  { D63_PRED, { INTRA_FRAME, NONE_FRAME } },
  { D117_PRED, { INTRA_FRAME, NONE_FRAME } },
  { D45_PRED, { INTRA_FRAME, NONE_FRAME } },
355 356

#if CONFIG_EXT_INTER
357 358 359 360
  { ZEROMV, { LAST_FRAME, INTRA_FRAME } },
  { NEARESTMV, { LAST_FRAME, INTRA_FRAME } },
  { NEARMV, { LAST_FRAME, INTRA_FRAME } },
  { NEWMV, { LAST_FRAME, INTRA_FRAME } },
361 362

#if CONFIG_EXT_REFS
363 364 365 366 367 368 369 370 371
  { ZEROMV, { LAST2_FRAME, INTRA_FRAME } },
  { NEARESTMV, { LAST2_FRAME, INTRA_FRAME } },
  { NEARMV, { LAST2_FRAME, INTRA_FRAME } },
  { NEWMV, { LAST2_FRAME, INTRA_FRAME } },

  { ZEROMV, { LAST3_FRAME, INTRA_FRAME } },
  { NEARESTMV, { LAST3_FRAME, INTRA_FRAME } },
  { NEARMV, { LAST3_FRAME, INTRA_FRAME } },
  { NEWMV, { LAST3_FRAME, INTRA_FRAME } },
372 373
#endif  // CONFIG_EXT_REFS

374 375 376 377
  { ZEROMV, { GOLDEN_FRAME, INTRA_FRAME } },
  { NEARESTMV, { GOLDEN_FRAME, INTRA_FRAME } },
  { NEARMV, { GOLDEN_FRAME, INTRA_FRAME } },
  { NEWMV, { GOLDEN_FRAME, INTRA_FRAME } },
378

379
#if CONFIG_EXT_REFS
380 381 382 383
  { ZEROMV, { BWDREF_FRAME, INTRA_FRAME } },
  { NEARESTMV, { BWDREF_FRAME, INTRA_FRAME } },
  { NEARMV, { BWDREF_FRAME, INTRA_FRAME } },
  { NEWMV, { BWDREF_FRAME, INTRA_FRAME } },
384 385
#endif  // CONFIG_EXT_REFS

386 387 388 389
  { ZEROMV, { ALTREF_FRAME, INTRA_FRAME } },
  { NEARESTMV, { ALTREF_FRAME, INTRA_FRAME } },
  { NEARMV, { ALTREF_FRAME, INTRA_FRAME } },
  { NEWMV, { ALTREF_FRAME, INTRA_FRAME } },
390
#endif  // CONFIG_EXT_INTER
Jingning Han's avatar
Jingning Han committed
391 392
};

Yaowu Xu's avatar
Yaowu Xu committed
393
static const REF_DEFINITION av1_ref_order[MAX_REFS] = {
Emil Keyder's avatar
Emil Keyder committed
394
  { { LAST_FRAME, NONE_FRAME } },
395
#if CONFIG_EXT_REFS
Emil Keyder's avatar
Emil Keyder committed
396 397
  { { LAST2_FRAME, NONE_FRAME } },    { { LAST3_FRAME, NONE_FRAME } },
  { { BWDREF_FRAME, NONE_FRAME } },
398
#endif  // CONFIG_EXT_REFS
Emil Keyder's avatar
Emil Keyder committed
399
  { { GOLDEN_FRAME, NONE_FRAME } },   { { ALTREF_FRAME, NONE_FRAME } },
400

401
  { { LAST_FRAME, ALTREF_FRAME } },
402
#if CONFIG_EXT_REFS
403
  { { LAST2_FRAME, ALTREF_FRAME } },  { { LAST3_FRAME, ALTREF_FRAME } },
404
#endif  // CONFIG_EXT_REFS
405
  { { GOLDEN_FRAME, ALTREF_FRAME } },
406 407

#if CONFIG_EXT_REFS
408 409
  { { LAST_FRAME, BWDREF_FRAME } },   { { LAST2_FRAME, BWDREF_FRAME } },
  { { LAST3_FRAME, BWDREF_FRAME } },  { { GOLDEN_FRAME, BWDREF_FRAME } },
410
#endif  // CONFIG_EXT_REFS
411

Emil Keyder's avatar
Emil Keyder committed
412
  { { INTRA_FRAME, NONE_FRAME } },
Jingning Han's avatar
Jingning Han committed
413 414
};

415
#if CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA || CONFIG_PALETTE
hui su's avatar
hui su committed
416
static INLINE int write_uniform_cost(int n, int v) {
417 418
  const int l = get_unsigned_bits(n);
  const int m = (1 << l) - n;
419
  if (l == 0) return 0;
hui su's avatar
hui su committed
420
  if (v < m)
Yaowu Xu's avatar
Yaowu Xu committed
421
    return (l - 1) * av1_cost_bit(128, 0);
hui su's avatar
hui su committed
422
  else
Yaowu Xu's avatar
Yaowu Xu committed
423
    return l * av1_cost_bit(128, 0);
hui su's avatar
hui su committed
424
}
425
#endif  // CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA || CONFIG_PALETTE
hui su's avatar
hui su committed
426

427 428 429
// constants for prune 1 and prune 2 decision boundaries
#define FAST_EXT_TX_CORR_MID 0.0
#define FAST_EXT_TX_EDST_MID 0.1
430 431 432 433
#define FAST_EXT_TX_CORR_MARGIN 0.5
#define FAST_EXT_TX_EDST_MARGIN 0.3

static const TX_TYPE_1D vtx_tab[TX_TYPES] = {
434
  DCT_1D,      ADST_1D, DCT_1D,      ADST_1D,
435
#if CONFIG_EXT_TX
436 437
  FLIPADST_1D, DCT_1D,  FLIPADST_1D, ADST_1D, FLIPADST_1D, IDTX_1D,
  DCT_1D,      IDTX_1D, ADST_1D,     IDTX_1D, FLIPADST_1D, IDTX_1D,
438 439 440 441
#endif  // CONFIG_EXT_TX
};

static const TX_TYPE_1D htx_tab[TX_TYPES] = {
442
  DCT_1D,  DCT_1D,      ADST_1D,     ADST_1D,
443
#if CONFIG_EXT_TX
444 445
  DCT_1D,  FLIPADST_1D, FLIPADST_1D, FLIPADST_1D, ADST_1D, IDTX_1D,
  IDTX_1D, DCT_1D,      IDTX_1D,     ADST_1D,     IDTX_1D, FLIPADST_1D,
446 447
#endif  // CONFIG_EXT_TX
};
448

Yushin Cho's avatar
Yushin Cho committed
449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469
#if CONFIG_DAALA_DIST
static int od_compute_var_4x4(od_coeff *x, int stride) {
  int sum;
  int s2;
  int i;
  sum = 0;
  s2 = 0;
  for (i = 0; i < 4; i++) {
    int j;
    for (j = 0; j < 4; j++) {
      int t;

      t = x[i * stride + j];
      sum += t;
      s2 += t * t;
    }
  }
  // TODO(yushin) : Check wheter any changes are required for high bit depth.
  return (s2 - (sum * sum >> 4)) >> 4;
}

470 471 472 473 474 475 476
/* OD_DIST_LP_MID controls the frequency weighting filter used for computing
   the distortion. For a value X, the filter is [1 X 1]/(X + 2) and
   is applied both horizontally and vertically. For X=5, the filter is
   a good approximation for the OD_QM8_Q4_HVS quantization matrix. */
#define OD_DIST_LP_MID (5)
#define OD_DIST_LP_NORM (OD_DIST_LP_MID + 2)

Yushin Cho's avatar
Yushin Cho committed
477
static double od_compute_dist_8x8(int qm, int use_activity_masking, od_coeff *x,
478
                                  od_coeff *y, od_coeff *e_lp, int stride) {
Yushin Cho's avatar
Yushin Cho committed
479 480 481 482 483 484 485 486 487 488 489 490
  double sum;
  int min_var;
  double mean_var;
  double var_stat;
  double activity;
  double calibration;
  int i;
  int j;
  double vardist;

  vardist = 0;
  OD_ASSERT(qm != OD_FLAT_QM);
491
  (void)qm;
Yushin Cho's avatar
Yushin Cho committed
492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522
#if 1
  min_var = INT_MAX;
  mean_var = 0;
  for (i = 0; i < 3; i++) {
    for (j = 0; j < 3; j++) {
      int varx;
      int vary;
      varx = od_compute_var_4x4(x + 2 * i * stride + 2 * j, stride);
      vary = od_compute_var_4x4(y + 2 * i * stride + 2 * j, stride);
      min_var = OD_MINI(min_var, varx);
      mean_var += 1. / (1 + varx);
      /* The cast to (double) is to avoid an overflow before the sqrt.*/
      vardist += varx - 2 * sqrt(varx * (double)vary) + vary;
    }
  }
  /* We use a different variance statistic depending on whether activity
     masking is used, since the harmonic mean appeared slghtly worse with
     masking off. The calibration constant just ensures that we preserve the
     rate compared to activity=1. */
  if (use_activity_masking) {
    calibration = 1.95;
    var_stat = 9. / mean_var;
  } else {
    calibration = 1.62;
    var_stat = min_var;
  }
  /* 1.62 is a calibration constant, 0.25 is a noise floor and 1/6 is the
     activity masking constant. */
  activity = calibration * pow(.25 + var_stat, -1. / 6);
#else
  activity = 1;
523
#endif  // 1
Yushin Cho's avatar
Yushin Cho committed
524 525 526
  sum = 0;
  for (i = 0; i < 8; i++) {
    for (j = 0; j < 8; j++)
527
      sum += e_lp[i * stride + j] * (double)e_lp[i * stride + j];
Yushin Cho's avatar
Yushin Cho committed
528
  }
529 530 531
  /* Normalize the filter to unit DC response. */
  sum *= 1. / (OD_DIST_LP_NORM * OD_DIST_LP_NORM * OD_DIST_LP_NORM *
               OD_DIST_LP_NORM);
Yushin Cho's avatar
Yushin Cho committed
532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551
  return activity * activity * (sum + vardist);
}

// Note : Inputs x and y are in a pixel domain
static double od_compute_dist(int qm, int activity_masking, od_coeff *x,
                              od_coeff *y, int bsize_w, int bsize_h,
                              int qindex) {
  int i;
  double sum;
  sum = 0;

  assert(bsize_w >= 8 && bsize_h >= 8);

  if (qm == OD_FLAT_QM) {
    for (i = 0; i < bsize_w * bsize_h; i++) {
      double tmp;
      tmp = x[i] - y[i];
      sum += tmp * tmp;
    }
  } else {
552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583
    int j;
    DECLARE_ALIGNED(16, od_coeff, e[MAX_TX_SQUARE]);
    DECLARE_ALIGNED(16, od_coeff, tmp[MAX_TX_SQUARE]);
    DECLARE_ALIGNED(16, od_coeff, e_lp[MAX_TX_SQUARE]);
    int mid = OD_DIST_LP_MID;
    for (i = 0; i < bsize_h; i++) {
      for (j = 0; j < bsize_w; j++) {
        e[i * bsize_w + j] = x[i * bsize_w + j] - y[i * bsize_w + j];
      }
    }
    for (i = 0; i < bsize_h; i++) {
      tmp[i * bsize_w] = mid * e[i * bsize_w] + 2 * e[i * bsize_w + 1];
      tmp[i * bsize_w + bsize_w - 1] =
          mid * e[i * bsize_w + bsize_w - 1] + 2 * e[i * bsize_w + bsize_w - 2];
      for (j = 1; j < bsize_w - 1; j++) {
        tmp[i * bsize_w + j] = mid * e[i * bsize_w + j] +
                               e[i * bsize_w + j - 1] + e[i * bsize_w + j + 1];
      }
    }
    for (j = 0; j < bsize_w; j++) {
      e_lp[j] = mid * tmp[j] + 2 * tmp[bsize_w + j];
      e_lp[(bsize_h - 1) * bsize_w + j] =
          mid * tmp[(bsize_h - 1) * bsize_w + j] +
          2 * tmp[(bsize_h - 2) * bsize_w + j];
    }
    for (i = 1; i < bsize_h - 1; i++) {
      for (j = 0; j < bsize_w; j++) {
        e_lp[i * bsize_w + j] = mid * tmp[i * bsize_w + j] +
                                tmp[(i - 1) * bsize_w + j] +
                                tmp[(i + 1) * bsize_w + j];
      }
    }
Yushin Cho's avatar
Yushin Cho committed
584 585 586
    for (i = 0; i < bsize_h; i += 8) {
      for (j = 0; j < bsize_w; j += 8) {
        sum += od_compute_dist_8x8(qm, activity_masking, &x[i * bsize_w + j],
587 588
                                   &y[i * bsize_w + j], &e_lp[i * bsize_w + j],
                                   bsize_w);
Yushin Cho's avatar
Yushin Cho committed
589 590
      }
    }
591 592 593 594 595 596 597 598 599 600 601
    /* Scale according to linear regression against SSE, for 8x8 blocks. */
    if (activity_masking) {
      sum *= 2.2 + (1.7 - 2.2) * (qindex - 99) / (210 - 99) +
             (qindex < 99 ? 2.5 * (qindex - 99) / 99 * (qindex - 99) / 99 : 0);
    } else {
      sum *= qindex >= 128
                 ? 1.4 + (0.9 - 1.4) * (qindex - 128) / (209 - 128)
                 : qindex <= 43
                       ? 1.5 + (2.0 - 1.5) * (qindex - 43) / (16 - 43)
                       : 1.5 + (1.4 - 1.5) * (qindex - 43) / (128 - 43);
    }
Yushin Cho's avatar
Yushin Cho committed
602 603 604 605 606
  }
  return sum;
}

static int64_t av1_daala_dist(const uint8_t *src, int src_stride,
607 608 609
                              const uint8_t *dst, int dst_stride, int bsw,
                              int bsh, int qm, int use_activity_masking,
                              int qindex) {
Yushin Cho's avatar
Yushin Cho committed
610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626
  int i, j;
  int64_t d;
  DECLARE_ALIGNED(16, od_coeff, orig[MAX_TX_SQUARE]);
  DECLARE_ALIGNED(16, od_coeff, rec[MAX_TX_SQUARE]);

  assert(qm == OD_HVS_QM);

  for (j = 0; j < bsh; j++)
    for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i];

  for (j = 0; j < bsh; j++)
    for (i = 0; i < bsw; i++) rec[j * bsw + i] = dst[j * dst_stride + i];

  d = (int64_t)od_compute_dist(qm, use_activity_masking, orig, rec, bsw, bsh,
                               qindex);
  return d;
}
627
#endif  // CONFIG_DAALA_DIST
Yushin Cho's avatar
Yushin Cho committed
628

Yaowu Xu's avatar
Yaowu Xu committed
629
static void get_energy_distribution_fine(const AV1_COMP *cpi, BLOCK_SIZE bsize,
630 631
                                         const uint8_t *src, int src_stride,
                                         const uint8_t *dst, int dst_stride,
632
                                         double *hordist, double *verdist) {
633 634
  const int bw = block_size_wide[bsize];
  const int bh = block_size_high[bsize];
635
  unsigned int esq[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
636

637
  const int f_index = bsize - BLOCK_16X16;
638
  if (f_index < 0) {
639 640
    const int w_shift = bw == 8 ? 1 : 2;
    const int h_shift = bh == 8 ? 1 : 2;
641
#if CONFIG_HIGHBITDEPTH
642
    if (cpi->common.use_highbitdepth) {
643 644 645 646 647
      const uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
      const uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
      for (int i = 0; i < bh; ++i)
        for (int j = 0; j < bw; ++j) {
          const int index = (j >> w_shift) + ((i >> h_shift) << 2);
648 649 650
          esq[index] +=
              (src16[j + i * src_stride] - dst16[j + i * dst_stride]) *
              (src16[j + i * src_stride] - dst16[j + i * dst_stride]);
651 652
        }
    } else {
653
#endif  // CONFIG_HIGHBITDEPTH
654

655 656 657
      for (int i = 0; i < bh; ++i)
        for (int j = 0; j < bw; ++j) {
          const int index = (j >> w_shift) + ((i >> h_shift) << 2);
658 659 660
          esq[index] += (src[j + i * src_stride] - dst[j + i * dst_stride]) *
                        (src[j + i * src_stride] - dst[j + i * dst_stride]);
        }
661
#if CONFIG_HIGHBITDEPTH
662
    }
663
#endif  // CONFIG_HIGHBITDEPTH
664
  } else {
665 666 667 668 669 670 671
    cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[0]);
    cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
                            &esq[1]);
    cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
                            &esq[2]);
    cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
                            dst_stride, &esq[3]);
672 673 674
    src += bh / 4 * src_stride;
    dst += bh / 4 * dst_stride;

675 676 677 678 679 680 681
    cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[4]);
    cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
                            &esq[5]);
    cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
                            &esq[6]);
    cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
                            dst_stride, &esq[7]);
682 683 684
    src += bh / 4 * src_stride;
    dst += bh / 4 * dst_stride;

685 686 687 688 689 690 691
    cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[8]);
    cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
                            &esq[9]);
    cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
                            &esq[10]);
    cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
                            dst_stride, &esq[11]);
692 693 694
    src += bh / 4 * src_stride;
    dst += bh / 4 * dst_stride;

695 696 697 698 699 700 701
    cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[12]);
    cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
                            &esq[13]);
    cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
                            &esq[14]);
    cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
                            dst_stride, &esq[15]);
702 703
  }

704 705 706
  double total = (double)esq[0] + esq[1] + esq[2] + esq[3] + esq[4] + esq[5] +
                 esq[6] + esq[7] + esq[8] + esq[9] + esq[10] + esq[11] +
                 esq[12] + esq[13] + esq[14] + esq[15];
707 708
  if (total > 0) {
    const double e_recip = 1.0 / total;
709 710 711 712 713 714
    hordist[0] = ((double)esq[0] + esq[4] + esq[8] + esq[12]) * e_recip;
    hordist[1] = ((double)esq[1] + esq[5] + esq[9] + esq[13]) * e_recip;
    hordist[2] = ((double)esq[2] + esq[6] + esq[10] + esq[14]) * e_recip;
    verdist[0] = ((double)esq[0] + esq[1] + esq[2] + esq[3]) * e_recip;
    verdist[1] = ((double)esq[4] + esq[5] + esq[6] + esq[7]) * e_recip;
    verdist[2] = ((double)esq[8] + esq[9] + esq[10] + esq[11]) * e_recip;
715 716 717 718 719 720 721
  } else {
    hordist[0] = verdist[0] = 0.25;
    hordist[1] = verdist[1] = 0.25;
    hordist[2] = verdist[2] = 0.25;
  }
}

Urvang Joshi's avatar
Urvang Joshi committed
722 723 724
static int adst_vs_flipadst(const AV1_COMP *cpi, BLOCK_SIZE bsize,
                            const uint8_t *src, int src_stride,
                            const uint8_t *dst, int dst_stride) {
725 726
  int prune_bitmask = 0;
  double svm_proj_h = 0, svm_proj_v = 0;
Alex Converse's avatar
Alex Converse committed
727
  double hdist[3] = { 0, 0, 0 }, vdist[3] = { 0, 0, 0 };
728 729
  get_energy_distribution_fine(cpi, bsize, src, src_stride, dst, dst_stride,
                               hdist, vdist);
730

731
  svm_proj_v = vdist[0] * ADST_FLIP_SVM[0] + vdist[1] * ADST_FLIP_SVM[1] +
732
               vdist[2] * ADST_FLIP_SVM[2] + ADST_FLIP_SVM[3];
733
  svm_proj_h = hdist[0] * ADST_FLIP_SVM[4] + hdist[1] * ADST_FLIP_SVM[5] +
734 735 736 737 738 739 740 741 742 743 744 745 746 747 748
               hdist[2] * ADST_FLIP_SVM[6] + ADST_FLIP_SVM[7];
  if (svm_proj_v > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN)
    prune_bitmask |= 1 << FLIPADST_1D;
  else if (svm_proj_v < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN)
    prune_bitmask |= 1 << ADST_1D;

  if (svm_proj_h > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN)
    prune_bitmask |= 1 << (FLIPADST_1D + 8);
  else if (svm_proj_h < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN)
    prune_bitmask |= 1 << (ADST_1D + 8);

  return prune_bitmask;
}

#if CONFIG_EXT_TX
Alex Converse's avatar
Alex Converse committed
749 750
static void get_horver_correlation(const int16_t *diff, int stride, int w,
                                   int h, double *hcorr, double *vcorr) {
751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777
  // Returns hor/ver correlation coefficient
  const int num = (h - 1) * (w - 1);
  double num_r;
  int i, j;
  int64_t xy_sum = 0, xz_sum = 0;
  int64_t x_sum = 0, y_sum = 0, z_sum = 0;
  int64_t x2_sum = 0, y2_sum = 0, z2_sum = 0;
  double x_var_n, y_var_n, z_var_n, xy_var_n, xz_var_n;
  *hcorr = *vcorr = 1;

  assert(num > 0);
  num_r = 1.0 / num;
  for (i = 1; i < h; ++i) {
    for (j = 1; j < w; ++j) {
      const int16_t x = diff[i * stride + j];
      const int16_t y = diff[i * stride + j - 1];
      const int16_t z = diff[(i - 1) * stride + j];
      xy_sum += x * y;
      xz_sum += x * z;
      x_sum += x;
      y_sum += y;
      z_sum += z;
      x2_sum += x * x;
      y2_sum += y * y;
      z2_sum += z * z;
    }
  }
778 779 780
  x_var_n = x2_sum - (x_sum * x_sum) * num_r;
  y_var_n = y2_sum - (y_sum * y_sum) * num_r;
  z_var_n = z2_sum - (z_sum * z_sum) * num_r;
781 782 783 784 785 786 787 788 789 790 791 792
  xy_var_n = xy_sum - (x_sum * y_sum) * num_r;
  xz_var_n = xz_sum - (x_sum * z_sum) * num_r;
  if (x_var_n > 0 && y_var_n > 0) {
    *hcorr = xy_var_n / sqrt(x_var_n * y_var_n);
    *hcorr = *hcorr < 0 ? 0 : *hcorr;
  }
  if (x_var_n > 0 && z_var_n > 0) {
    *vcorr = xz_var_n / sqrt(x_var_n * z_var_n);
    *vcorr = *vcorr < 0 ? 0 : *vcorr;
  }
}

Alex Converse's avatar
Alex Converse committed
793 794
int dct_vs_idtx(const int16_t *diff, int stride, int w, int h) {
  double hcorr, vcorr;
795
  int prune_bitmask = 0;
Alex Converse's avatar
Alex Converse committed
796
  get_horver_correlation(diff, stride, w, h, &hcorr, &vcorr);
797

Alex Converse's avatar
Alex Converse committed
798
  if (vcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN)
799
    prune_bitmask |= 1 << IDTX_1D;
Alex Converse's avatar
Alex Converse committed
800
  else if (vcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN)
801 802
    prune_bitmask |= 1 << DCT_1D;

Alex Converse's avatar
Alex Converse committed
803
  if (hcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN)
804
    prune_bitmask |= 1 << (IDTX_1D + 8);
Alex Converse's avatar
Alex Converse committed
805
  else if (hcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN)
806 807 808 809 810
    prune_bitmask |= 1 << (DCT_1D + 8);
  return prune_bitmask;
}

// Performance drop: 0.5%, Speed improvement: 24%
Yaowu Xu's avatar
Yaowu Xu committed
811
static int prune_two_for_sby(const AV1_COMP *cpi, BLOCK_SIZE bsize,
Alex Converse's avatar
Alex Converse committed
812 813
                             MACROBLOCK *x, const MACROBLOCKD *xd,
                             int adst_flipadst, int dct_idtx) {
814
  int prune = 0;
815

Alex Converse's avatar
Alex Converse committed
816 817 818
  if (adst_flipadst) {
    const struct macroblock_plane *const p = &x->plane[0];
    const struct macroblockd_plane *const pd = &xd->plane[0];
819
    prune |= adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride,
Alex Converse's avatar
Alex Converse committed
820 821 822 823 824 825 826 827 828
                              pd->dst.buf, pd->dst.stride);
  }
  if (dct_idtx) {
    av1_subtract_plane(x, bsize, 0);
    const struct macroblock_plane *const p = &x->plane[0];
    const int bw = 4 << (b_width_log2_lookup[bsize]);
    const int bh = 4 << (b_height_log2_lookup[bsize]);
    prune |= dct_vs_idtx(p->src_diff, bw, bw, bh);
  }
829 830 831

  return prune;
}
832 833
#endif  // CONFIG_EXT_TX

834
// Performance drop: 0.3%, Speed improvement: 5%
Yaowu Xu's avatar
Yaowu Xu committed
835
static int prune_one_for_sby(const AV1_COMP *cpi, BLOCK_SIZE bsize,
Alex Converse's avatar
Alex Converse committed
836 837 838
                             const MACROBLOCK *x, const MACROBLOCKD *xd) {
  const struct macroblock_plane *const p = &x->plane[0];
  const struct macroblockd_plane *const pd = &xd->plane[0];
839
  return adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride, pd->dst.buf,
Alex Converse's avatar
Alex Converse committed
840
                          pd->dst.stride);
841 842
}

Yaowu Xu's avatar
Yaowu Xu committed
843
static int prune_tx_types(const AV1_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x,
Urvang Joshi's avatar
Urvang Joshi committed
844
                          const MACROBLOCKD *const xd, int tx_set) {
845 846 847
#if CONFIG_EXT_TX
  const int *tx_set_1D = ext_tx_used_inter_1D[tx_set];
#else
848
  const int tx_set_1D[TX_TYPES_1D] = { 0 };
849
#endif  // CONFIG_EXT_TX
850

851
  switch (cpi->sf.tx_type_search.prune_mode) {
852 853
    case NO_PRUNE: return 0; break;
    case PRUNE_ONE:
854
      if ((tx_set >= 0) && !(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D]))
855
        return 0;
856 857
      return prune_one_for_sby(cpi, bsize, x, xd);
      break;
858 859
#if CONFIG_EXT_TX
    case PRUNE_TWO:
860
      if ((tx_set >= 0) && !(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D])) {
861
        if (!(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D])) return 0;
862 863
        return prune_two_for_sby(cpi, bsize, x, xd, 0, 1);
      }
864
      if ((tx_set >= 0) && !(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D]))
865 866
        return prune_two_for_sby(cpi, bsize, x, xd, 1, 0);
      return prune_two_for_sby(cpi, bsize, x, xd, 1, 1);
867
      break;
868
#endif  // CONFIG_EXT_TX
869 870 871 872 873
  }
  assert(0);
  return 0;
}

874
static int do_tx_type_search(TX_TYPE tx_type, int prune) {
875 876 877
// TODO(sarahparker) implement for non ext tx
#if CONFIG_EXT_TX
  return !(((prune >> vtx_tab[tx_type]) & 1) |
878
           ((prune >> (htx_tab[tx_type] + 8)) & 1));
879 880
#else
  // temporary to avoid compiler warnings
881 882 883 884
  (void)vtx_tab;
  (void)htx_tab;
  (void)tx_type;
  (void)prune;
885
  return 1;
886
#endif  // CONFIG_EXT_TX
887 888
}

Yaowu Xu's avatar
Yaowu Xu committed
889
static void model_rd_from_sse(const AV1_COMP *const cpi,
890 891
                              const MACROBLOCKD *const xd, BLOCK_SIZE bsize,
                              int plane, int64_t sse, int *rate,
Geza Lore's avatar
Geza Lore committed
892 893 894
                              int64_t *dist) {
  const struct macroblockd_plane *const pd = &xd->plane[plane];
  const int dequant_shift =
895
#if CONFIG_HIGHBITDEPTH
896
      (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 :
897
#endif  // CONFIG_HIGHBITDEPTH
898
                                                    3;
Geza Lore's avatar
Geza Lore committed
899 900 901 902 903 904 905

  // Fast approximate the modelling function.
  if (cpi->sf.simple_model_rd_from_var) {
    const int64_t square_error = sse;
    int quantizer = (pd->dequant[1] >> dequant_shift);

    if (quantizer < 120)
Yaowu Xu's avatar
Yaowu Xu committed
906
      *rate = (int)((square_error * (280 - quantizer)) >>
Yaowu Xu's avatar
Yaowu Xu committed
907
                    (16 - AV1_PROB_COST_SHIFT));
Geza Lore's avatar
Geza Lore committed
908 909 910 911
    else
      *rate = 0;
    *dist = (square_error * quantizer) >> 8;
  } else {
Yaowu Xu's avatar
Yaowu Xu committed
912 913
    av1_model_rd_from_var_lapndz(sse, num_pels_log2_lookup[bsize],
                                 pd->dequant[1] >> dequant_shift, rate, dist);
Geza Lore's avatar
Geza Lore committed
914 915 916 917 918
  }

  *dist <<= 4;
}

Yaowu Xu's avatar
Yaowu Xu committed
919
static void model_rd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
920 921 922 923
                            MACROBLOCK *x, MACROBLOCKD *xd, int plane_from,
                            int plane_to, int *out_rate_sum,
                            int64_t *out_dist_sum, int *skip_txfm_sb,
                            int64_t *skip_sse_sb) {
Jingning Han's avatar
Jingning Han committed
924 925 926
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
Geza Lore's avatar
Geza Lore committed
927 928 929
  int plane;
  const int ref = xd->mi[0]->mbmi.ref_frame[0];

Jingning Han's avatar
Jingning Han committed
930 931 932 933 934 935
  int64_t rate_sum = 0;
  int64_t dist_sum = 0;
  int64_t total_sse = 0;

  x->pred_sse[ref] = 0;

Geza Lore's avatar
Geza Lore committed
936 937 938
  for (plane = plane_from; plane <= plane_to; ++plane) {
    struct macroblock_plane *const p = &x->plane[plane];
    struct macroblockd_plane *const pd = &xd->plane[plane];
Jingning Han's avatar
Jingning Han committed
939
#if CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
940 941
    const BLOCK_SIZE bs = AOMMAX(BLOCK_4X4, get_plane_block_size(bsize, pd));
#else
Jingning Han's avatar
Jingning Han committed
942
    const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
943
#endif  // CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
Jingning Han's avatar
Jingning Han committed
944

Geza Lore's avatar
Geza Lore committed
945 946 947
    unsigned int sse;
    int rate;
    int64_t dist;
Jingning Han's avatar
Jingning Han committed
948

949 950
#if CONFIG_CB4X4
    if (x->skip_chroma_rd && plane) continue;
951
#endif  // CONFIG_CB4X4
952

Geza Lore's avatar
Geza Lore committed
953 954
    // TODO(geza): Write direct sse functions that do not compute
    // variance as well.
955 956
    cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride,
                       &sse);
Jingning Han's avatar
Jingning Han committed
957

958
    if (plane == 0) x->pred_sse[ref] = sse;
Jingning Han's avatar
Jingning Han committed
959

Geza Lore's avatar
Geza Lore committed
960
    total_sse += sse;
Jingning Han's avatar
Jingning Han committed
961

Geza Lore's avatar
Geza Lore committed
962
    model_rd_from_sse(cpi, xd, bs, plane, sse, &rate, &dist);
Geza Lore's avatar
Geza Lore committed
963 964 965

    rate_sum += rate;
    dist_sum += dist;
Jingning Han's avatar
Jingning Han committed
966 967
  }

Geza Lore's avatar
Geza Lore committed
968
  *skip_txfm_sb = total_sse == 0;
Jingning Han's avatar
Jingning Han committed
969 970
  *skip_sse_sb = total_sse << 4;
  *out_rate_sum = (int)rate_sum;
Geza Lore's avatar
Geza Lore committed
971
  *out_dist_sum = dist_sum;
Jingning Han's avatar
Jingning Han committed
972 973
}

Yaowu Xu's avatar
Yaowu Xu committed
974 975
int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
                          intptr_t block_size, int64_t *ssz) {
Jingning Han's avatar
Jingning Han committed
976 977 978 979 980
  int i;
  int64_t error = 0, sqcoeff = 0;

  for (i = 0; i < block_size; i++) {
    const int diff = coeff[i] - dqcoeff[i];
981
    error += diff * diff;
Jingning Han's avatar
Jingning Han committed
982 983 984 985 986 987 988
    sqcoeff += coeff[i] * coeff[i];
  }

  *ssz = sqcoeff;
  return error;
}

Yaowu Xu's avatar
Yaowu Xu committed
989 990
int64_t av1_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff,
                             int block_size) {
Jingning Han's avatar
Jingning Han committed
991 992 993 994 995
  int i;
  int64_t error = 0;

  for (i = 0; i < block_size; i++) {
    const int diff = coeff[i] - dqcoeff[i];
996
    error += diff * diff;
Jingning Han's avatar
Jingning Han committed
997 998 999 1000 1001
  }

  return error;
}

1002
#if CONFIG_HIGHBITDEPTH
Yaowu Xu's avatar
Yaowu Xu committed
1003 1004 1005
int64_t av1_highbd_block_error_c(const tran_low_t *coeff,
                                 const tran_low_t *dqcoeff, intptr_t block_size,
                                 int64_t *ssz, int bd) {
Jingning Han's avatar
Jingning Han committed
1006 1007 1008 1009 1010 1011 1012
  int i;
  int64_t error = 0, sqcoeff = 0;
  int shift = 2 * (bd - 8);
  int rounding = shift > 0 ? 1 << (shift - 1) : 0;

  for (i = 0; i < block_size; i++) {
    const int64_t diff = coeff[i] - dqcoeff[i];
1013
    error += diff * diff;
Jingning Han's avatar
Jingning Han committed
1014 1015 1016 1017 1018 1019 1020 1021 1022
    sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
  }
  assert(error >= 0 && sqcoeff >= 0);
  error = (error + rounding) >> shift;
  sqcoeff = (sqcoeff + rounding) >> shift;

  *ssz = sqcoeff;
  return error;
}
1023
#endif  // CONFIG_HIGHBITDEPTH
Jingning Han's avatar
Jingning Han committed
1024

Thomas Daede's avatar
Thomas Daede committed
1025 1026 1027 1028 1029 1030 1031 1032 1033 1034
#if CONFIG_PVQ
// Without PVQ, av1_block_error_c() return two kind of errors,
// 1) reconstruction (i.e. decoded) error and
// 2) Squared sum of transformed residue (i.e. 'coeff')
// However, if PVQ is enabled, coeff does not keep the transformed residue
// but instead a transformed original is kept.
// Hence, new parameter ref vector (i.e. transformed predicted signal)
// is required to derive the residue signal,
// i.e. coeff - ref = residue (all transformed).

1035
#if CONFIG_HIGHBITDEPTH
Thomas Daede's avatar
Thomas Daede committed
1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072
static int64_t av1_highbd_block_error2_c(const tran_low_t *coeff,
                                         const tran_low_t *dqcoeff,
                                         const tran_low_t *ref,
                                         intptr_t block_size, int64_t *ssz,
                                         int bd) {
  int64_t error;
  int64_t sqcoeff;
  int shift = 2 * (bd - 8);
  int rounding = shift > 0 ? 1 << (shift - 1) : 0;
  // Use the existing sse codes for calculating distortion of decoded signal:
  // i.e. (orig - decoded)^2
  // For high bit depth, throw away ssz until a 32-bit version of
  // av1_block_error_fp is written.
  int64_t ssz_trash;
  error = av1_block_error(coeff, dqcoeff, block_size, &ssz_trash);
  // prediction residue^2 = (orig - ref)^2
  sqcoeff = av1_block_error(coeff, ref, block_size, &ssz_trash);
  error = (error + rounding) >> shift;
  sqcoeff = (sqcoeff + rounding) >> shift;
  *ssz = sqcoeff;
  return error;
}
#else
// TODO(yushin) : Since 4x4 case does not need ssz, better to refactor into
// a separate function that does not do the extra computations for ssz.
static int64_t av1_block_error2_c(const tran_low_t *coeff,
                                  const tran_low_t *dqcoeff,
                                  const tran_low_t *ref, intptr_t block_size,
                                  int64_t *ssz) {
  int64_t error;
  // Use the existing sse codes for calculating distortion of decoded signal:
  // i.e. (orig - decoded)^2
  error = av1_block_error_fp(coeff, dqcoeff, block_size);
  // prediction residue^2 = (orig - ref)^2
  *ssz = av1_block_error_fp(coeff, ref, block_size);
  return error;
}
1073
#endif  // CONFIG_HIGHBITDEPTH
Thomas Daede's avatar
Thomas Daede committed
1074 1075
#endif  // CONFIG_PVQ

1076
#if !CONFIG_PVQ || CONFIG_VAR_TX
1077
/* The trailing '0' is a terminator which is used inside av1_cost_coeffs() to
Jingning Han's avatar
Jingning Han committed
1078 1079 1080 1081
 * decide whether to include cost of a trailing EOB node or not (i.e. we
 * can skip this if the last coefficient in this transform block, e.g. the
 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
 * were non-zero). */
1082 1083 1084 1085 1086
#if !CONFIG_LV_MAP
static int cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane,
                       int block, TX_SIZE tx_size, const SCAN_ORDER *scan_order,
                       const ENTROPY_CONTEXT *a, const ENTROPY_CONTEXT *l,
                       int use_fast_coef_costing) {
Jingning Han's avatar
Jingning Han committed
1087 1088 1089 1090 1091
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
  const struct macroblock_plane *p = &x->plane[plane];
  const struct macroblockd_plane *pd = &xd->plane[plane];
  const PLANE_TYPE type = pd->plane_type;
1092
  const uint16_t *band_count = &band_count_table[tx_size][1];
Jingning Han's avatar
Jingning Han committed
1093 1094
  const int eob = p->eobs[block];
  const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
1095
  const int tx_size_ctx = txsize_sqr_map[tx_size];
1096 1097
  unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
      x->token_costs[tx_size_ctx][type][is_inter_block(mbmi)];
1098
  uint8_t token_cache[MAX_TX_SQUARE];
1099
  int pt = combine_entropy_contexts(*a, *l);
Jingning Han's avatar
Jingning Han committed
1100
  int c, cost;
1101 1102
  const int16_t *scan = scan_order->scan;
  const int16_t *nb = scan_order->neighbors;
1103 1104 1105 1106
#if CONFIG_NEW_TOKENSET
  const int ref = is_inter_block(mbmi);
  aom_prob *blockz_probs =
      cm->fc->blockzero_probs[txsize_sqr_map[tx_size]][type][ref];
Thomas Davies's avatar
Thomas Davies committed
1107

1108
#endif  // CONFIG_NEW_TOKENSET
1109

1110
#if CONFIG_HIGHBITDEPTH
1111
  const int cat6_bits = av1_get_cat6_extrabits_size(tx_size, xd->bd);
Jingning Han's avatar
Jingning Han committed
1112
#else
1113
  const int cat6_bits = av1_get_cat6_extrabits_size(tx_size, 8);
1114
#endif  // CONFIG_HIGHBITDEPTH
Jingning Han's avatar
Jingning Han committed
1115

1116
#if !CONFIG_VAR_TX && !CONFIG_SUPERTX
Jingning Han's avatar
Jingning Han committed
1117
  // Check for consistency of tx_size with mode info
Angie Chiang's avatar
Angie Chiang committed
1118
  assert(tx_size == get_tx_size(plane, xd));
1119
#endif  // !CONFIG_VAR_TX && !CONFIG_SUPERTX
1120
  (void)cm;
Jingning Han's avatar
Jingning Han committed
1121 1122

  if (eob == 0) {
1123
#if CONFIG_NEW_TOKENSET
Jingning Han's avatar
Jingning Han committed
1124
    // single eob token
1125 1126
    cost = av1_cost_bit(blockz_probs[pt], 0);
#else
Jingning Han's avatar
Jingning Han committed
1127
    cost = token_costs[0][0][pt][EOB_TOKEN];
1128
#endif  // CONFIG_NEW_TOKENSET
Jingning Han's avatar
Jingning Han committed
1129
  } else {
Julia Robson's avatar
Julia Robson committed
1130 1131 1132 1133 1134 1135
    if (use_fast_coef_costing) {
      int band_left = *band_count++;

      // dc token
      int v = qcoeff[0];
      int16_t prev_t;
1136
      cost = av1_get_token_cost(v, &prev_t, cat6_bits);
Thomas Davies's avatar
Thomas Davies committed
1137 1138 1139
#if CONFIG_NEW_TOKENSET
      cost += (*token_costs)[!prev_t][pt][prev_t];
#else
Julia Robson's avatar
Julia Robson committed
1140
      cost += (*token_costs)[0][pt][prev_t];
Thomas Davies's avatar
Thomas Davies committed
1141
#endif
Julia Robson's avatar
Julia Robson committed
1142

Yaowu Xu's avatar
Yaowu Xu committed
1143
      token_cache[0] = av1_pt_energy_class[prev_t];
Julia Robson's avatar
Julia Robson committed
1144 1145 1146 1147 1148 1149 1150 1151
      ++token_costs;

      // ac tokens
      for (c = 1; c < eob; c++) {
        const int rc = scan[c];
        int16_t t;

        v = qcoeff[rc];
1152
        cost += av1_get_token_cost(v, &t, cat6_bits);
Thomas Davies's avatar
Thomas Davies committed
1153 1154 1155
#if CONFIG_NEW_TOKENSET
        cost += (*token_costs)[!t][!prev_t][t];
#else
Julia Robson's avatar
Julia Robson committed
1156
        cost += (*token_costs)[!prev_t][!prev_t][t];
Thomas Davies's avatar
Thomas Davies committed
1157
#endif
Julia Robson's avatar
Julia Robson committed
1158 1159 1160 1161 1162
        prev_t = t;
        if (!--band_left) {
          band_left = *band_count++;
          ++token_costs;
        }
Jingning Han's avatar
Jingning Han committed
1163 1164
      }

Julia Robson's avatar
Julia Robson committed
1165
      // eob token
Thomas Davies's avatar
Thomas Davies committed
1166 1167
      if (band_left || CONFIG_NEW_TOKENSET)
        cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
Julia Robson's avatar
Julia Robson committed
1168 1169 1170 1171 1172 1173 1174

    } else {  // !use_fast_coef_costing
      int band_left = *band_count++;

      // dc token
      int v = qcoeff[0];
      int16_t tok;
Thomas Davies's avatar
Thomas Davies committed
1175
#if !CONFIG_NEW_TOKENSET
1176
      unsigned int(*tok_cost_ptr)[COEFF_CONTEXTS][ENTROPY_TOKENS];
Thomas Davies's avatar
Thomas Davies committed
1177
#endif
1178
      cost = av1_get_token_cost(v, &tok, cat6_bits);
Thomas Davies's avatar
Thomas Davies committed
1179 1180 1181
#if CONFIG_NEW_TOKENSET
      cost += (*token_costs)[!tok][pt][tok];
#else
Julia Robson's avatar
Julia Robson committed
1182
      cost += (*token_costs)[0][pt][tok];
Thomas Davies's avatar
Thomas Davies committed
1183
#endif
Julia Robson's avatar
Julia Robson committed
1184

Yaowu Xu's avatar
Yaowu Xu committed
1185
      token_cache[0] = av1_pt_energy_class[tok];
Julia Robson's avatar
Julia Robson committed
1186 1187
      ++token_costs;

Thomas Davies's avatar
Thomas Davies committed
1188
#if !CONFIG_NEW_TOKENSET
Julia Robson's avatar
Julia Robson committed
1189
      tok_cost_ptr = &((*token_costs)[!tok]);
Thomas Davies's avatar
Thomas Davies committed
1190
#endif
Julia Robson's avatar
Julia Robson committed
1191 1192 1193 1194 1195 1196

      // ac tokens
      for (c = 1; c < eob; c++) {
        const int rc = scan[c];

        v = qcoeff[rc];
1197
        cost += av1_get_token_cost(v, &tok, cat6_bits);
Julia Robson's avatar
Julia Robson committed
1198
        pt = get_coef_context(nb, token_cache, c);
Thomas Davies's avatar
Thomas Davies committed
1199 1200 1201
#if CONFIG_NEW_TOKENSET
        cost += (*token_costs)[!tok][pt][tok];
#else
Julia Robson's avatar
Julia Robson committed
1202
        cost += (*tok_cost_ptr)[pt][tok];
Thomas Davies's avatar
Thomas Davies committed
1203
#endif
Yaowu Xu's avatar
Yaowu Xu committed
1204
        token_cache[rc] = av1_pt_energy_class[tok];
Julia Robson's avatar
Julia Robson committed
1205 1206 1207 1208
        if (!--band_left) {
          band_left = *band_count++;
          ++token_costs;
        }
Thomas Davies's avatar
Thomas Davies committed
1209
#if !CONFIG_NEW_TOKENSET
Julia Robson's avatar
Julia Robson committed
1210
        tok_cost_ptr = &((*token_costs)[!tok]);
Thomas Davies's avatar
Thomas Davies committed
1211
#endif
Julia Robson's avatar
Julia Robson committed
1212 1213 1214
      }

      // eob token
Thomas Davies's avatar
Thomas Davies committed
1215
      if (band_left || CONFIG_NEW_TOKENSET) {
Jingning Han's avatar
Jingning Han committed
1216 1217 1218 1219 1220 1221 1222 1223
        pt = get_coef_context(nb, token_cache, c);
        cost += (*token_costs)[0][pt][EOB_TOKEN];
      }
    }
  }

  return cost;
}
1224 1225
#endif  // !CONFIG_LV_MAP

1226
int av1_cost_coeffs(const AV1_COMP *const cpi, MACROBLOCK *x, int plane,
1227 1228 1229 1230
                    int block, TX_SIZE tx_size, const SCAN_ORDER *scan_order,
                    const ENTROPY_CONTEXT *a, const ENTROPY_CONTEXT *l,
                    int use_fast_coef_costing) {
#if !CONFIG_LV_MAP
1231
  const AV1_COMMON *const cm = &cpi->common;
1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254
  return cost_coeffs(cm, x, plane, block, tx_size, scan_order, a, l,
                     use_fast_coef_costing);
#else  // !CONFIG_LV_MAP
  (void)scan_order;
  (void)use_fast_coef_costing;
  const MACROBLOCKD *xd = &x->e_mbd;
  const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
  const struct macroblockd_plane *pd = &xd->plane[plane];
  const BLOCK_SIZE bsize = mbmi->sb_type;
#if CONFIG_CB4X4
#if CONFIG_CHROMA_2X2
  const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
#else
  const BLOCK_SIZE plane_bsize =
      AOMMAX(BLOCK_4X4, get_plane_block_size(bsize, pd));
#endif  // CONFIG_CHROMA_2X2
#else   // CONFIG_CB4X4
  const BLOCK_SIZE plane_bsize =
      get_plane_block_size(AOMMAX(BLOCK_8X8, bsize), pd);
#endif  // CONFIG_CB4X4

  TXB_CTX txb_ctx;
  get_txb_ctx(plane_bsize, tx_size, plane, a, l, &txb_ctx);
1255
  return av1_cost_coeffs_txb(cpi, x, plane, block, &txb_ctx);
1256 1257
#endif  // !CONFIG_LV_MAP
}
Fergus Simpson's avatar
Fergus Simpson committed
1258
#endif  // !CONFIG_PVQ || CONFIG_VAR_TX
Jingning Han's avatar
Jingning Han committed
1259

Alex Converse's avatar
Alex Converse committed
1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305
// Get transform block visible dimensions cropped to the MI units.
static void get_txb_dimensions(const MACROBLOCKD *xd, int plane,
                               BLOCK_SIZE plane_bsize, int blk_row, int blk_col,
                               BLOCK_SIZE tx_bsize, int *width, int *height,
                               int *visible_width, int *visible_height) {
  assert(tx_bsize <= plane_bsize);
  int txb_height = block_size_high[tx_bsize];
  int txb_width = block_size_wide[tx_bsize];
  const int block_height = block_size_high[plane_bsize];
  const int block_width = block_size_wide[plane_bsize];
  const struct macroblockd_plane *const pd = &xd->plane[plane];
  // TODO(aconverse@google.com): Investigate using crop_width/height here rather
  // than the MI size
  const int block_rows =
      (xd->mb_to_bottom_edge >= 0)
          ? block_height
          : (xd->mb_to_bottom_edge >> (3 + pd->subsampling_y)) + block_height;
  const int block_cols =
      (xd->mb_to_right_edge >= 0)
          ? block_width
          : (xd->mb_to_right_edge >> (3 + pd->subsampling_x)) + block_width;
  const int tx_unit_size = tx_size_wide_log2[0];
  if (width) *width = txb_width;
  if (height) *height = txb_height;
  *visible_width = clamp(block_cols - (blk_col << tx_unit_size), 0, txb_width);
  *visible_height =
      clamp(block_rows - (blk_row << tx_unit_size), 0, txb_height);
}

// Compute the pixel domain sum square error on all visible 4x4s in the
// transform block.
static unsigned pixel_sse(const AV1_COMP *const cpi, const MACROBLOCKD *xd,
                          int plane, const uint8_t *src, const int src_stride,
                          const uint8_t *dst, const int dst_stride, int blk_row,
                          int blk_col, const BLOCK_SIZE plane_bsize,
                          const BLOCK_SIZE tx_bsize) {
  int txb_rows, txb_cols, visible_rows, visible_cols;
  get_txb_dimensions(xd, plane, plane_bsize, blk_row, blk_col, tx_bsize,
                     &txb_cols, &txb_rows, &visible_cols, &visible_rows);
  assert(visible_rows > 0);
  assert(visible_cols > 0);
  if (txb_rows == visible_rows && txb_cols == visible_cols) {
    unsigned sse;
    cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &sse);
    return sse;
  }
1306
#if CONFIG_HIGHBITDEPTH
Alex Converse's avatar
Alex Converse committed
1307 1308 1309 1310 1311
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    uint64_t sse = aom_highbd_sse_odd_size(src, src_stride, dst, dst_stride,
                                           visible_cols, visible_rows);
    return (unsigned int)ROUND_POWER_OF_TWO(sse, (xd->bd - 8) * 2);
  }
1312
#endif  // CONFIG_HIGHBITDEPTH
Alex Converse's avatar
Alex Converse committed
1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329
  unsigned sse = aom_sse_odd_size(src, src_stride, dst, dst_stride,
                                  visible_cols, visible_rows);
  return sse;
}

// Compute the squares sum squares on all visible 4x4s in the transform block.
static int64_t sum_squares_visible(const MACROBLOCKD *xd, int plane,
                                   const int16_t *diff, const int diff_stride,
                                   int blk_row, int blk_col,
                                   const BLOCK_SIZE plane_bsize,
                                   const BLOCK_SIZE tx_bsize) {
  int visible_rows, visible_cols;
  get_txb_dimensions(xd, plane, plane_bsize, blk_row, blk_col, tx_bsize, NULL,
                     NULL, &visible_cols, &visible_rows);
  return aom_sum_squares_2d_i16(diff, diff_stride, visible_cols, visible_rows);
}

Angie Chiang's avatar
Angie Chiang committed
1330 1331 1332 1333
void av1_dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
                    BLOCK_SIZE plane_bsize, int block, int blk_row, int blk_col,
                    TX_SIZE tx_size, int64_t *out_dist, int64_t *out_sse,
                    OUTPUT_STATUS output_status) {
1334
  MACROBLOCKD *const xd = &x->e_mbd;
Alex Converse's avatar
Alex Converse committed
1335
  const struct macroblock_plane *const p = &x->plane[plane];
Yushin Cho's avatar
Yushin Cho committed
1336 1337 1338 1339 1340
#if CONFIG_DAALA_DIST
  int qm = OD_HVS_QM;
  int use_activity_masking = 0;
#if CONFIG_PVQ
  use_activity_masking = x->daala_enc.use_activity_masking;
1341
#endif  // CONFIG_PVQ
1342 1343 1344
  struct macroblockd_plane *const pd = &xd->plane[plane];
#else   // CONFIG_DAALA_DIST
  const struct macroblockd_plane *const pd = &xd->plane[plane];
1345
#endif  // CONFIG_DAALA_DIST
Yushin Cho's avatar
Yushin Cho committed
1346 1347

  if (cpi->sf.use_transform_domain_distortion && !CONFIG_DAALA_DIST) {