rdopt.c 429 KB
Newer Older
Jingning Han's avatar
Jingning Han committed
1
/*
Yaowu Xu's avatar
Yaowu Xu committed
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Jingning Han's avatar
Jingning Han committed
3
 *
Yaowu Xu's avatar
Yaowu Xu committed
4 5 6 7 8 9
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Jingning Han's avatar
Jingning Han committed
10 11 12 13 14
 */

#include <assert.h>
#include <math.h>

Yaowu Xu's avatar
Yaowu Xu committed
15
#include "./aom_dsp_rtcd.h"
Jingning Han's avatar
Jingning Han committed
16
#include "./av1_rtcd.h"
Jingning Han's avatar
Jingning Han committed
17

Yaowu Xu's avatar
Yaowu Xu committed
18
#include "aom_dsp/aom_dsp_common.h"
19
#include "aom_dsp/blend.h"
Yaowu Xu's avatar
Yaowu Xu committed
20
#include "aom_mem/aom_mem.h"
21 22
#include "aom_ports/mem.h"
#include "aom_ports/system_state.h"
Jingning Han's avatar
Jingning Han committed
23

24 25 26 27 28 29 30 31 32 33 34 35
#include "av1/common/common.h"
#include "av1/common/common_data.h"
#include "av1/common/entropy.h"
#include "av1/common/entropymode.h"
#include "av1/common/idct.h"
#include "av1/common/mvref_common.h"
#include "av1/common/pred_common.h"
#include "av1/common/quant_common.h"
#include "av1/common/reconinter.h"
#include "av1/common/reconintra.h"
#include "av1/common/scan.h"
#include "av1/common/seg_common.h"
Yue Chen's avatar
Yue Chen committed
36 37 38
#if CONFIG_WARPED_MOTION
#include "av1/common/warped_motion.h"
#endif  // CONFIG_WARPED_MOTION
Jingning Han's avatar
Jingning Han committed
39

Jingning Han's avatar
Jingning Han committed
40
#include "av1/encoder/aq_variance.h"
41 42 43 44 45 46
#include "av1/encoder/cost.h"
#include "av1/encoder/encodemb.h"
#include "av1/encoder/encodemv.h"
#include "av1/encoder/encoder.h"
#include "av1/encoder/hybrid_fwd_txfm.h"
#include "av1/encoder/mcomp.h"
47
#if CONFIG_PALETTE
48
#include "av1/encoder/palette.h"
49
#endif  // CONFIG_PALETTE
50 51 52 53
#include "av1/encoder/quantize.h"
#include "av1/encoder/ratectrl.h"
#include "av1/encoder/rd.h"
#include "av1/encoder/rdopt.h"
54
#include "av1/encoder/tokenize.h"
55 56 57
#if CONFIG_PVQ
#include "av1/encoder/pvq_encoder.h"
#endif
58
#if CONFIG_DUAL_FILTER
Angie Chiang's avatar
Angie Chiang committed
59 60
#define DUAL_FILTER_SET_SIZE (SWITCHABLE_FILTERS * SWITCHABLE_FILTERS)
static const int filter_sets[DUAL_FILTER_SET_SIZE][2] = {
61 62 63
  { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 }, { 1, 0 }, { 1, 1 },
  { 1, 2 }, { 1, 3 }, { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 },
  { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 },
64
};
Angie Chiang's avatar
Angie Chiang committed
65
#endif  // CONFIG_DUAL_FILTER
66

67 68
#if CONFIG_EXT_REFS

69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
#define LAST_FRAME_MODE_MASK                                      \
  ((1 << INTRA_FRAME) | (1 << LAST2_FRAME) | (1 << LAST3_FRAME) | \
   (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
#define LAST2_FRAME_MODE_MASK                                    \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST3_FRAME) | \
   (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
#define LAST3_FRAME_MODE_MASK                                    \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
   (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
#define GOLDEN_FRAME_MODE_MASK                                   \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
   (1 << LAST3_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
#define BWDREF_FRAME_MODE_MASK                                   \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
   (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME))
#define ALTREF_FRAME_MODE_MASK                                   \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
   (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME))

#else

#define LAST_FRAME_MODE_MASK \
  ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
#define GOLDEN_FRAME_MODE_MASK \
  ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
#define ALTREF_FRAME_MODE_MASK \
  ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | (1 << INTRA_FRAME))
Jingning Han's avatar
Jingning Han committed
96

97 98
#endif  // CONFIG_EXT_REFS

99
#if CONFIG_EXT_REFS
100
#define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | (1 << BWDREF_FRAME) | 0x01)
101
#else
102
#define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | 0x01)
103
#endif  // CONFIG_EXT_REFS
Jingning Han's avatar
Jingning Han committed
104

105 106
#define MIN_EARLY_TERM_INDEX 3
#define NEW_MV_DISCOUNT_FACTOR 8
Jingning Han's avatar
Jingning Han committed
107

108 109 110 111 112
#if CONFIG_EXT_INTRA
#define ANGLE_SKIP_THRESH 10
#define FILTER_FAST_SEARCH 1
#endif  // CONFIG_EXT_INTRA

113 114
const double ADST_FLIP_SVM[8] = { -6.6623, -2.8062, -3.2531, 3.1671,    // vert
                                  -7.7051, -3.2234, -3.6193, 3.4533 };  // horz
115

Jingning Han's avatar
Jingning Han committed
116 117 118 119 120
typedef struct {
  PREDICTION_MODE mode;
  MV_REFERENCE_FRAME ref_frame[2];
} MODE_DEFINITION;

121
typedef struct { MV_REFERENCE_FRAME ref_frame[2]; } REF_DEFINITION;
Jingning Han's avatar
Jingning Han committed
122 123

struct rdcost_block_args {
Yaowu Xu's avatar
Yaowu Xu committed
124
  const AV1_COMP *cpi;
Jingning Han's avatar
Jingning Han committed
125
  MACROBLOCK *x;
126 127
  ENTROPY_CONTEXT t_above[2 * MAX_MIB_SIZE];
  ENTROPY_CONTEXT t_left[2 * MAX_MIB_SIZE];
128
  RD_STATS rd_stats;
Jingning Han's avatar
Jingning Han committed
129 130 131 132
  int64_t this_rd;
  int64_t best_rd;
  int exit_early;
  int use_fast_coef_costing;
133
  const SCAN_ORDER *scan_order;
Jingning Han's avatar
Jingning Han committed
134 135 136
};

#define LAST_NEW_MV_INDEX 6
Yaowu Xu's avatar
Yaowu Xu committed
137
static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
138
  { NEARESTMV, { LAST_FRAME, NONE } },
139
#if CONFIG_EXT_REFS
140 141 142
  { NEARESTMV, { LAST2_FRAME, NONE } },
  { NEARESTMV, { LAST3_FRAME, NONE } },
  { NEARESTMV, { BWDREF_FRAME, NONE } },
143
#endif  // CONFIG_EXT_REFS
144 145
  { NEARESTMV, { ALTREF_FRAME, NONE } },
  { NEARESTMV, { GOLDEN_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
146

147
  { DC_PRED, { INTRA_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
148

149
  { NEWMV, { LAST_FRAME, NONE } },
150
#if CONFIG_EXT_REFS
151 152 153
  { NEWMV, { LAST2_FRAME, NONE } },
  { NEWMV, { LAST3_FRAME, NONE } },
  { NEWMV, { BWDREF_FRAME, NONE } },
154
#endif  // CONFIG_EXT_REFS
155 156
  { NEWMV, { ALTREF_FRAME, NONE } },
  { NEWMV, { GOLDEN_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
157

158
  { NEARMV, { LAST_FRAME, NONE } },
159
#if CONFIG_EXT_REFS
160 161 162
  { NEARMV, { LAST2_FRAME, NONE } },
  { NEARMV, { LAST3_FRAME, NONE } },
  { NEARMV, { BWDREF_FRAME, NONE } },
163
#endif  // CONFIG_EXT_REFS
164 165
  { NEARMV, { ALTREF_FRAME, NONE } },
  { NEARMV, { GOLDEN_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
166

Yue Chen's avatar
Yue Chen committed
167
#if CONFIG_EXT_INTER
168
  { NEWFROMNEARMV, { LAST_FRAME, NONE } },
169
#if CONFIG_EXT_REFS
170 171 172
  { NEWFROMNEARMV, { LAST2_FRAME, NONE } },
  { NEWFROMNEARMV, { LAST3_FRAME, NONE } },
  { NEWFROMNEARMV, { BWDREF_FRAME, NONE } },
173
#endif  // CONFIG_EXT_REFS
174 175
  { NEWFROMNEARMV, { ALTREF_FRAME, NONE } },
  { NEWFROMNEARMV, { GOLDEN_FRAME, NONE } },
Yue Chen's avatar
Yue Chen committed
176 177
#endif  // CONFIG_EXT_INTER

178
  { ZEROMV, { LAST_FRAME, NONE } },
179
#if CONFIG_EXT_REFS
180 181 182
  { ZEROMV, { LAST2_FRAME, NONE } },
  { ZEROMV, { LAST3_FRAME, NONE } },
  { ZEROMV, { BWDREF_FRAME, NONE } },
183
#endif  // CONFIG_EXT_REFS
184 185
  { ZEROMV, { GOLDEN_FRAME, NONE } },
  { ZEROMV, { ALTREF_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
186

187
// TODO(zoeliu): May need to reconsider the order on the modes to check
188

189
#if CONFIG_EXT_INTER
190
  { NEAREST_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
191
#if CONFIG_EXT_REFS
192 193
  { NEAREST_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEAREST_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
194
#endif  // CONFIG_EXT_REFS
195
  { NEAREST_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
196
#if CONFIG_EXT_REFS
197 198 199 200
  { NEAREST_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
201
#endif  // CONFIG_EXT_REFS
202

203
#else  // CONFIG_EXT_INTER
204

205
  { NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
206
#if CONFIG_EXT_REFS
207 208
  { NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
209
#endif  // CONFIG_EXT_REFS
210
  { NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
211
#if CONFIG_EXT_REFS
212 213 214 215
  { NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
216
#endif  // CONFIG_EXT_REFS
217
#endif  // CONFIG_EXT_INTER
Jingning Han's avatar
Jingning Han committed
218

219
  { TM_PRED, { INTRA_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
220

221 222 223 224
#if CONFIG_ALT_INTRA
  { SMOOTH_PRED, { INTRA_FRAME, NONE } },
#endif  // CONFIG_ALT_INTRA

225
#if CONFIG_EXT_INTER
226 227 228 229 230 231 232 233 234
  { NEAR_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEAREST_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEAR_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEW_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEAREST_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEW_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEAR_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEW_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
  { ZERO_ZEROMV, { LAST_FRAME, ALTREF_FRAME } },
235

236
#if CONFIG_EXT_REFS
237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255
  { NEAR_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEAREST_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEAR_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEW_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEAREST_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEW_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEAR_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEW_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
  { ZERO_ZEROMV, { LAST2_FRAME, ALTREF_FRAME } },

  { NEAR_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEAREST_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEAR_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEW_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEAREST_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEW_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEAR_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEW_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
  { ZERO_ZEROMV, { LAST3_FRAME, ALTREF_FRAME } },
256
#endif  // CONFIG_EXT_REFS
257

258 259 260 261 262 263 264 265 266
  { NEAR_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEAREST_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEAR_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEW_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEAREST_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEW_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEAR_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEW_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { ZERO_ZEROMV, { GOLDEN_FRAME, ALTREF_FRAME } },
267 268

#if CONFIG_EXT_REFS
269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307
  { NEAR_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEAR_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEW_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEAREST_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEW_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEAR_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEW_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
  { ZERO_ZEROMV, { LAST_FRAME, BWDREF_FRAME } },

  { NEAR_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEAR_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEW_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEAREST_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEW_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEAR_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEW_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
  { ZERO_ZEROMV, { LAST2_FRAME, BWDREF_FRAME } },

  { NEAR_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEAR_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEW_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEAREST_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEW_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEAR_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEW_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
  { ZERO_ZEROMV, { LAST3_FRAME, BWDREF_FRAME } },

  { NEAR_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEAR_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEW_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEAREST_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEW_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEAR_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEW_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { ZERO_ZEROMV, { GOLDEN_FRAME, BWDREF_FRAME } },
308
#endif  // CONFIG_EXT_REFS
309 310 311

#else  // CONFIG_EXT_INTER

312 313
  { NEARMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEWMV, { LAST_FRAME, ALTREF_FRAME } },
314
#if CONFIG_EXT_REFS
315 316 317 318
  { NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
319
#endif  // CONFIG_EXT_REFS
320 321
  { NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
322 323

#if CONFIG_EXT_REFS
324 325 326 327 328 329 330 331
  { NEARMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEWMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
332
#endif  // CONFIG_EXT_REFS
Jingning Han's avatar
Jingning Han committed
333

334
  { ZEROMV, { LAST_FRAME, ALTREF_FRAME } },
335
#if CONFIG_EXT_REFS
336 337
  { ZEROMV, { LAST2_FRAME, ALTREF_FRAME } },
  { ZEROMV, { LAST3_FRAME, ALTREF_FRAME } },
338
#endif  // CONFIG_EXT_REFS
339
  { ZEROMV, { GOLDEN_FRAME, ALTREF_FRAME } },
340 341

#if CONFIG_EXT_REFS
342 343 344 345
  { ZEROMV, { LAST_FRAME, BWDREF_FRAME } },
  { ZEROMV, { LAST2_FRAME, BWDREF_FRAME } },
  { ZEROMV, { LAST3_FRAME, BWDREF_FRAME } },
  { ZEROMV, { GOLDEN_FRAME, BWDREF_FRAME } },
346
#endif  // CONFIG_EXT_REFS
347

348
#endif  // CONFIG_EXT_INTER
Jingning Han's avatar
Jingning Han committed
349

350 351 352 353 354 355 356 357
  { H_PRED, { INTRA_FRAME, NONE } },
  { V_PRED, { INTRA_FRAME, NONE } },
  { D135_PRED, { INTRA_FRAME, NONE } },
  { D207_PRED, { INTRA_FRAME, NONE } },
  { D153_PRED, { INTRA_FRAME, NONE } },
  { D63_PRED, { INTRA_FRAME, NONE } },
  { D117_PRED, { INTRA_FRAME, NONE } },
  { D45_PRED, { INTRA_FRAME, NONE } },
358 359

#if CONFIG_EXT_INTER
360 361 362 363
  { ZEROMV, { LAST_FRAME, INTRA_FRAME } },
  { NEARESTMV, { LAST_FRAME, INTRA_FRAME } },
  { NEARMV, { LAST_FRAME, INTRA_FRAME } },
  { NEWMV, { LAST_FRAME, INTRA_FRAME } },
364 365

#if CONFIG_EXT_REFS
366 367 368 369 370 371 372 373 374
  { ZEROMV, { LAST2_FRAME, INTRA_FRAME } },
  { NEARESTMV, { LAST2_FRAME, INTRA_FRAME } },
  { NEARMV, { LAST2_FRAME, INTRA_FRAME } },
  { NEWMV, { LAST2_FRAME, INTRA_FRAME } },

  { ZEROMV, { LAST3_FRAME, INTRA_FRAME } },
  { NEARESTMV, { LAST3_FRAME, INTRA_FRAME } },
  { NEARMV, { LAST3_FRAME, INTRA_FRAME } },
  { NEWMV, { LAST3_FRAME, INTRA_FRAME } },
375 376
#endif  // CONFIG_EXT_REFS

377 378 379 380
  { ZEROMV, { GOLDEN_FRAME, INTRA_FRAME } },
  { NEARESTMV, { GOLDEN_FRAME, INTRA_FRAME } },
  { NEARMV, { GOLDEN_FRAME, INTRA_FRAME } },
  { NEWMV, { GOLDEN_FRAME, INTRA_FRAME } },
381

382
#if CONFIG_EXT_REFS
383 384 385 386
  { ZEROMV, { BWDREF_FRAME, INTRA_FRAME } },
  { NEARESTMV, { BWDREF_FRAME, INTRA_FRAME } },
  { NEARMV, { BWDREF_FRAME, INTRA_FRAME } },
  { NEWMV, { BWDREF_FRAME, INTRA_FRAME } },
387 388
#endif  // CONFIG_EXT_REFS

389 390 391 392
  { ZEROMV, { ALTREF_FRAME, INTRA_FRAME } },
  { NEARESTMV, { ALTREF_FRAME, INTRA_FRAME } },
  { NEARMV, { ALTREF_FRAME, INTRA_FRAME } },
  { NEWMV, { ALTREF_FRAME, INTRA_FRAME } },
393
#endif  // CONFIG_EXT_INTER
Jingning Han's avatar
Jingning Han committed
394 395
};

Yaowu Xu's avatar
Yaowu Xu committed
396
static const REF_DEFINITION av1_ref_order[MAX_REFS] = {
397
  { { LAST_FRAME, NONE } },
398
#if CONFIG_EXT_REFS
399 400
  { { LAST2_FRAME, NONE } },          { { LAST3_FRAME, NONE } },
  { { BWDREF_FRAME, NONE } },
401
#endif  // CONFIG_EXT_REFS
402
  { { GOLDEN_FRAME, NONE } },         { { ALTREF_FRAME, NONE } },
403

404
  { { LAST_FRAME, ALTREF_FRAME } },
405
#if CONFIG_EXT_REFS
406
  { { LAST2_FRAME, ALTREF_FRAME } },  { { LAST3_FRAME, ALTREF_FRAME } },
407
#endif  // CONFIG_EXT_REFS
408
  { { GOLDEN_FRAME, ALTREF_FRAME } },
409 410

#if CONFIG_EXT_REFS
411 412
  { { LAST_FRAME, BWDREF_FRAME } },   { { LAST2_FRAME, BWDREF_FRAME } },
  { { LAST3_FRAME, BWDREF_FRAME } },  { { GOLDEN_FRAME, BWDREF_FRAME } },
413
#endif  // CONFIG_EXT_REFS
414

415
  { { INTRA_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
416 417
};

418
#if CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA || CONFIG_PALETTE
hui su's avatar
hui su committed
419 420
static INLINE int write_uniform_cost(int n, int v) {
  int l = get_unsigned_bits(n), m = (1 << l) - n;
421
  if (l == 0) return 0;
hui su's avatar
hui su committed
422
  if (v < m)
Yaowu Xu's avatar
Yaowu Xu committed
423
    return (l - 1) * av1_cost_bit(128, 0);
hui su's avatar
hui su committed
424
  else
Yaowu Xu's avatar
Yaowu Xu committed
425
    return l * av1_cost_bit(128, 0);
hui su's avatar
hui su committed
426
}
427
#endif  // CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA || CONFIG_PALETTE
hui su's avatar
hui su committed
428

429 430 431
// constants for prune 1 and prune 2 decision boundaries
#define FAST_EXT_TX_CORR_MID 0.0
#define FAST_EXT_TX_EDST_MID 0.1
432 433 434 435
#define FAST_EXT_TX_CORR_MARGIN 0.5
#define FAST_EXT_TX_EDST_MARGIN 0.3

static const TX_TYPE_1D vtx_tab[TX_TYPES] = {
436
  DCT_1D,      ADST_1D, DCT_1D,      ADST_1D,
437
#if CONFIG_EXT_TX
438 439
  FLIPADST_1D, DCT_1D,  FLIPADST_1D, ADST_1D, FLIPADST_1D, IDTX_1D,
  DCT_1D,      IDTX_1D, ADST_1D,     IDTX_1D, FLIPADST_1D, IDTX_1D,
440 441 442 443
#endif  // CONFIG_EXT_TX
};

static const TX_TYPE_1D htx_tab[TX_TYPES] = {
444
  DCT_1D,  DCT_1D,      ADST_1D,     ADST_1D,
445
#if CONFIG_EXT_TX
446 447
  DCT_1D,  FLIPADST_1D, FLIPADST_1D, FLIPADST_1D, ADST_1D, IDTX_1D,
  IDTX_1D, DCT_1D,      IDTX_1D,     ADST_1D,     IDTX_1D, FLIPADST_1D,
448 449
#endif  // CONFIG_EXT_TX
};
450

Yaowu Xu's avatar
Yaowu Xu committed
451
static void get_energy_distribution_fine(const AV1_COMP *cpi, BLOCK_SIZE bsize,
452 453 454
                                         uint8_t *src, int src_stride,
                                         uint8_t *dst, int dst_stride,
                                         double *hordist, double *verdist) {
455 456
  int bw = block_size_wide[bsize];
  int bh = block_size_high[bsize];
457
  unsigned int esq[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
458 459
  unsigned int var[16];
  double total = 0;
460

461
  const int f_index = bsize - BLOCK_16X16;
462 463 464 465
  if (f_index < 0) {
    int i, j, index;
    int w_shift = bw == 8 ? 1 : 2;
    int h_shift = bh == 8 ? 1 : 2;
Yaowu Xu's avatar
Yaowu Xu committed
466
#if CONFIG_AOM_HIGHBITDEPTH
467 468 469 470 471 472
    if (cpi->common.use_highbitdepth) {
      uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
      uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
      for (i = 0; i < bh; ++i)
        for (j = 0; j < bw; ++j) {
          index = (j >> w_shift) + ((i >> h_shift) << 2);
473 474 475
          esq[index] +=
              (src16[j + i * src_stride] - dst16[j + i * dst_stride]) *
              (src16[j + i * src_stride] - dst16[j + i * dst_stride]);
476 477
        }
    } else {
Yaowu Xu's avatar
Yaowu Xu committed
478
#endif  // CONFIG_AOM_HIGHBITDEPTH
479 480 481 482 483 484 485

      for (i = 0; i < bh; ++i)
        for (j = 0; j < bw; ++j) {
          index = (j >> w_shift) + ((i >> h_shift) << 2);
          esq[index] += (src[j + i * src_stride] - dst[j + i * dst_stride]) *
                        (src[j + i * src_stride] - dst[j + i * dst_stride]);
        }
Yaowu Xu's avatar
Yaowu Xu committed
486
#if CONFIG_AOM_HIGHBITDEPTH
487
    }
Yaowu Xu's avatar
Yaowu Xu committed
488
#endif  // CONFIG_AOM_HIGHBITDEPTH
489
  } else {
490 491 492 493 494
    var[0] = cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[0]);
    var[1] = cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4,
                                     dst_stride, &esq[1]);
    var[2] = cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2,
                                     dst_stride, &esq[2]);
495 496 497 498 499
    var[3] = cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride,
                                     dst + 3 * bw / 4, dst_stride, &esq[3]);
    src += bh / 4 * src_stride;
    dst += bh / 4 * dst_stride;

500 501 502 503 504
    var[4] = cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[4]);
    var[5] = cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4,
                                     dst_stride, &esq[5]);
    var[6] = cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2,
                                     dst_stride, &esq[6]);
505 506 507 508 509
    var[7] = cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride,
                                     dst + 3 * bw / 4, dst_stride, &esq[7]);
    src += bh / 4 * src_stride;
    dst += bh / 4 * dst_stride;

510 511 512 513 514
    var[8] = cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[8]);
    var[9] = cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4,
                                     dst_stride, &esq[9]);
    var[10] = cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2,
                                      dst_stride, &esq[10]);
515 516 517 518 519
    var[11] = cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride,
                                      dst + 3 * bw / 4, dst_stride, &esq[11]);
    src += bh / 4 * src_stride;
    dst += bh / 4 * dst_stride;

520 521 522 523 524 525
    var[12] =
        cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[12]);
    var[13] = cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4,
                                      dst_stride, &esq[13]);
    var[14] = cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2,
                                      dst_stride, &esq[14]);
526 527 528 529
    var[15] = cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride,
                                      dst + 3 * bw / 4, dst_stride, &esq[15]);
  }

530 531 532
  total = esq[0] + esq[1] + esq[2] + esq[3] + esq[4] + esq[5] + esq[6] +
          esq[7] + esq[8] + esq[9] + esq[10] + esq[11] + esq[12] + esq[13] +
          esq[14] + esq[15];
533 534
  if (total > 0) {
    const double e_recip = 1.0 / total;
535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552
    hordist[0] =
        ((double)esq[0] + (double)esq[4] + (double)esq[8] + (double)esq[12]) *
        e_recip;
    hordist[1] =
        ((double)esq[1] + (double)esq[5] + (double)esq[9] + (double)esq[13]) *
        e_recip;
    hordist[2] =
        ((double)esq[2] + (double)esq[6] + (double)esq[10] + (double)esq[14]) *
        e_recip;
    verdist[0] =
        ((double)esq[0] + (double)esq[1] + (double)esq[2] + (double)esq[3]) *
        e_recip;
    verdist[1] =
        ((double)esq[4] + (double)esq[5] + (double)esq[6] + (double)esq[7]) *
        e_recip;
    verdist[2] =
        ((double)esq[8] + (double)esq[9] + (double)esq[10] + (double)esq[11]) *
        e_recip;
553 554 555 556 557
  } else {
    hordist[0] = verdist[0] = 0.25;
    hordist[1] = verdist[1] = 0.25;
    hordist[2] = verdist[2] = 0.25;
  }
558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573
  (void)var[0];
  (void)var[1];
  (void)var[2];
  (void)var[3];
  (void)var[4];
  (void)var[5];
  (void)var[6];
  (void)var[7];
  (void)var[8];
  (void)var[9];
  (void)var[10];
  (void)var[11];
  (void)var[12];
  (void)var[13];
  (void)var[14];
  (void)var[15];
574 575
}

Yaowu Xu's avatar
Yaowu Xu committed
576 577 578
static int adst_vs_flipadst(const AV1_COMP *cpi, BLOCK_SIZE bsize, uint8_t *src,
                            int src_stride, uint8_t *dst, int dst_stride,
                            double *hdist, double *vdist) {
579 580
  int prune_bitmask = 0;
  double svm_proj_h = 0, svm_proj_v = 0;
581 582
  get_energy_distribution_fine(cpi, bsize, src, src_stride, dst, dst_stride,
                               hdist, vdist);
583

584
  svm_proj_v = vdist[0] * ADST_FLIP_SVM[0] + vdist[1] * ADST_FLIP_SVM[1] +
585
               vdist[2] * ADST_FLIP_SVM[2] + ADST_FLIP_SVM[3];
586
  svm_proj_h = hdist[0] * ADST_FLIP_SVM[4] + hdist[1] * ADST_FLIP_SVM[5] +
587 588 589 590 591 592 593 594 595 596 597 598 599 600 601
               hdist[2] * ADST_FLIP_SVM[6] + ADST_FLIP_SVM[7];
  if (svm_proj_v > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN)
    prune_bitmask |= 1 << FLIPADST_1D;
  else if (svm_proj_v < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN)
    prune_bitmask |= 1 << ADST_1D;

  if (svm_proj_h > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN)
    prune_bitmask |= 1 << (FLIPADST_1D + 8);
  else if (svm_proj_h < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN)
    prune_bitmask |= 1 << (ADST_1D + 8);

  return prune_bitmask;
}

#if CONFIG_EXT_TX
602
static void get_horver_correlation(int16_t *diff, int stride, int w, int h,
603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630
                                   double *hcorr, double *vcorr) {
  // Returns hor/ver correlation coefficient
  const int num = (h - 1) * (w - 1);
  double num_r;
  int i, j;
  int64_t xy_sum = 0, xz_sum = 0;
  int64_t x_sum = 0, y_sum = 0, z_sum = 0;
  int64_t x2_sum = 0, y2_sum = 0, z2_sum = 0;
  double x_var_n, y_var_n, z_var_n, xy_var_n, xz_var_n;
  *hcorr = *vcorr = 1;

  assert(num > 0);
  num_r = 1.0 / num;
  for (i = 1; i < h; ++i) {
    for (j = 1; j < w; ++j) {
      const int16_t x = diff[i * stride + j];
      const int16_t y = diff[i * stride + j - 1];
      const int16_t z = diff[(i - 1) * stride + j];
      xy_sum += x * y;
      xz_sum += x * z;
      x_sum += x;
      y_sum += y;
      z_sum += z;
      x2_sum += x * x;
      y2_sum += y * y;
      z2_sum += z * z;
    }
  }
631 632 633
  x_var_n = x2_sum - (x_sum * x_sum) * num_r;
  y_var_n = y2_sum - (y_sum * y_sum) * num_r;
  z_var_n = z2_sum - (z_sum * z_sum) * num_r;
634 635 636 637 638 639 640 641 642 643 644 645
  xy_var_n = xy_sum - (x_sum * y_sum) * num_r;
  xz_var_n = xz_sum - (x_sum * z_sum) * num_r;
  if (x_var_n > 0 && y_var_n > 0) {
    *hcorr = xy_var_n / sqrt(x_var_n * y_var_n);
    *hcorr = *hcorr < 0 ? 0 : *hcorr;
  }
  if (x_var_n > 0 && z_var_n > 0) {
    *vcorr = xz_var_n / sqrt(x_var_n * z_var_n);
    *vcorr = *vcorr < 0 ? 0 : *vcorr;
  }
}

646 647
int dct_vs_idtx(int16_t *diff, int stride, int w, int h, double *hcorr,
                double *vcorr) {
648 649 650 651
  int prune_bitmask = 0;
  get_horver_correlation(diff, stride, w, h, hcorr, vcorr);

  if (*vcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN)
652
    prune_bitmask |= 1 << IDTX_1D;
653 654 655 656
  else if (*vcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN)
    prune_bitmask |= 1 << DCT_1D;

  if (*hcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN)
657
    prune_bitmask |= 1 << (IDTX_1D + 8);
658 659 660 661 662 663
  else if (*hcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN)
    prune_bitmask |= 1 << (DCT_1D + 8);
  return prune_bitmask;
}

// Performance drop: 0.5%, Speed improvement: 24%
Yaowu Xu's avatar
Yaowu Xu committed
664
static int prune_two_for_sby(const AV1_COMP *cpi, BLOCK_SIZE bsize,
665
                             MACROBLOCK *x, MACROBLOCKD *xd, int adst_flipadst,
666
                             int dct_idtx) {
667 668 669 670 671
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];
  const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
  const int bw = 4 << (b_width_log2_lookup[bs]);
  const int bh = 4 << (b_height_log2_lookup[bs]);
672
  double hdist[3] = { 0, 0, 0 }, vdist[3] = { 0, 0, 0 };
673
  double hcorr, vcorr;
674
  int prune = 0;
Yaowu Xu's avatar
Yaowu Xu committed
675
  av1_subtract_plane(x, bsize, 0);
676

677 678 679
  if (adst_flipadst)
    prune |= adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride,
                              pd->dst.buf, pd->dst.stride, hdist, vdist);
680
  if (dct_idtx) prune |= dct_vs_idtx(p->src_diff, bw, bw, bh, &hcorr, &vcorr);
681 682 683

  return prune;
}
684 685
#endif  // CONFIG_EXT_TX

686
// Performance drop: 0.3%, Speed improvement: 5%
Yaowu Xu's avatar
Yaowu Xu committed
687
static int prune_one_for_sby(const AV1_COMP *cpi, BLOCK_SIZE bsize,
688
                             MACROBLOCK *x, MACROBLOCKD *xd) {
689 690
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];
691
  double hdist[3] = { 0, 0, 0 }, vdist[3] = { 0, 0, 0 };
Yaowu Xu's avatar
Yaowu Xu committed
692
  av1_subtract_plane(x, bsize, 0);
693 694
  return adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride, pd->dst.buf,
                          pd->dst.stride, hdist, vdist);
695 696
}

Yaowu Xu's avatar
Yaowu Xu committed
697
static int prune_tx_types(const AV1_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x,
698 699 700 701
                          MACROBLOCKD *xd, int tx_set) {
#if CONFIG_EXT_TX
  const int *tx_set_1D = ext_tx_used_inter_1D[tx_set];
#else
702
  const int tx_set_1D[TX_TYPES_1D] = { 0 };
703 704
#endif

705
  switch (cpi->sf.tx_type_search.prune_mode) {
706 707
    case NO_PRUNE: return 0; break;
    case PRUNE_ONE:
708
      if ((tx_set >= 0) && !(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D]))
709
        return 0;
710 711
      return prune_one_for_sby(cpi, bsize, x, xd);
      break;
712 713
#if CONFIG_EXT_TX
    case PRUNE_TWO:
714
      if ((tx_set >= 0) && !(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D])) {
715
        if (!(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D])) return 0;
716 717
        return prune_two_for_sby(cpi, bsize, x, xd, 0, 1);
      }
718
      if ((tx_set >= 0) && !(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D]))
719 720
        return prune_two_for_sby(cpi, bsize, x, xd, 1, 0);
      return prune_two_for_sby(cpi, bsize, x, xd, 1, 1);
721
      break;
722
#endif
723 724 725 726 727
  }
  assert(0);
  return 0;
}

728
static int do_tx_type_search(TX_TYPE tx_type, int prune) {
729 730 731
// TODO(sarahparker) implement for non ext tx
#if CONFIG_EXT_TX
  return !(((prune >> vtx_tab[tx_type]) & 1) |
732
           ((prune >> (htx_tab[tx_type] + 8)) & 1));
733 734
#else
  // temporary to avoid compiler warnings
735 736 737 738
  (void)vtx_tab;
  (void)htx_tab;
  (void)tx_type;
  (void)prune;
739 740 741 742
  return 1;
#endif
}

Yaowu Xu's avatar
Yaowu Xu committed
743
static void model_rd_from_sse(const AV1_COMP *const cpi,
744 745
                              const MACROBLOCKD *const xd, BLOCK_SIZE bsize,
                              int plane, int64_t sse, int *rate,
Geza Lore's avatar
Geza Lore committed
746 747 748
                              int64_t *dist) {
  const struct macroblockd_plane *const pd = &xd->plane[plane];
  const int dequant_shift =
Yaowu Xu's avatar
Yaowu Xu committed
749
#if CONFIG_AOM_HIGHBITDEPTH
750
      (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 :
Yaowu Xu's avatar
Yaowu Xu committed
751
#endif  // CONFIG_AOM_HIGHBITDEPTH
752
                                                    3;
Geza Lore's avatar
Geza Lore committed
753 754 755 756 757 758 759

  // Fast approximate the modelling function.
  if (cpi->sf.simple_model_rd_from_var) {
    const int64_t square_error = sse;
    int quantizer = (pd->dequant[1] >> dequant_shift);

    if (quantizer < 120)
Yaowu Xu's avatar
Yaowu Xu committed
760
      *rate = (int)((square_error * (280 - quantizer)) >>
Yaowu Xu's avatar
Yaowu Xu committed
761
                    (16 - AV1_PROB_COST_SHIFT));
Geza Lore's avatar
Geza Lore committed
762 763 764 765
    else
      *rate = 0;
    *dist = (square_error * quantizer) >> 8;
  } else {
Yaowu Xu's avatar
Yaowu Xu committed
766 767
    av1_model_rd_from_var_lapndz(sse, num_pels_log2_lookup[bsize],
                                 pd->dequant[1] >> dequant_shift, rate, dist);
Geza Lore's avatar
Geza Lore committed
768 769 770 771 772
  }

  *dist <<= 4;
}

Yaowu Xu's avatar
Yaowu Xu committed
773
static void model_rd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
774 775 776 777
                            MACROBLOCK *x, MACROBLOCKD *xd, int plane_from,
                            int plane_to, int *out_rate_sum,
                            int64_t *out_dist_sum, int *skip_txfm_sb,
                            int64_t *skip_sse_sb) {
Jingning Han's avatar
Jingning Han committed
778 779 780
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
Geza Lore's avatar
Geza Lore committed
781 782 783
  int plane;
  const int ref = xd->mi[0]->mbmi.ref_frame[0];

Jingning Han's avatar
Jingning Han committed
784 785 786 787 788 789
  int64_t rate_sum = 0;
  int64_t dist_sum = 0;
  int64_t total_sse = 0;

  x->pred_sse[ref] = 0;

Geza Lore's avatar
Geza Lore committed
790 791 792
  for (plane = plane_from; plane <= plane_to; ++plane) {
    struct macroblock_plane *const p = &x->plane[plane];
    struct macroblockd_plane *const pd = &xd->plane[plane];
Jingning Han's avatar
Jingning Han committed
793 794
    const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);

Geza Lore's avatar
Geza Lore committed
795 796 797
    unsigned int sse;
    int rate;
    int64_t dist;
Jingning Han's avatar
Jingning Han committed
798

Geza Lore's avatar
Geza Lore committed
799 800
    // TODO(geza): Write direct sse functions that do not compute
    // variance as well.
801 802
    cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride,
                       &sse);
Jingning Han's avatar
Jingning Han committed
803

804
    if (plane == 0) x->pred_sse[ref] = sse;
Jingning Han's avatar
Jingning Han committed
805

Geza Lore's avatar
Geza Lore committed
806
    total_sse += sse;
Jingning Han's avatar
Jingning Han committed
807

Geza Lore's avatar
Geza Lore committed
808
    model_rd_from_sse(cpi, xd, bs, plane, sse, &rate, &dist);
Geza Lore's avatar
Geza Lore committed
809 810 811

    rate_sum += rate;
    dist_sum += dist;
Jingning Han's avatar
Jingning Han committed
812 813
  }

Geza Lore's avatar
Geza Lore committed
814
  *skip_txfm_sb = total_sse == 0;
Jingning Han's avatar
Jingning Han committed
815 816
  *skip_sse_sb = total_sse << 4;
  *out_rate_sum = (int)rate_sum;
Geza Lore's avatar
Geza Lore committed
817
  *out_dist_sum = dist_sum;
Jingning Han's avatar
Jingning Han committed
818 819
}

820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846
#if CONFIG_PVQ
// Without PVQ, av1_block_error_c() return two kind of errors,
// 1) reconstruction (i.e. decoded) error and
// 2) Squared sum of transformed residue (i.e. 'coeff')
// However, if PVQ is enabled, coeff does not keep the transformed residue
// but instead a transformed original is kept.
// Hence, new parameter ref vector (i.e. transformed predicted signal)
// is required to derive the residue signal,
// i.e. coeff - ref = residue (all transformed).

// TODO(yushin) : Since 4x4 case does not need ssz, better to refactor into
// a separate function that does not do the extra computations for ssz.
int64_t av1_block_error2_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
                           const tran_low_t *ref, intptr_t block_size,
                           int64_t *ssz) {
  int64_t error;

  // Use the existing sse codes for calculating distortion of decoded signal:
  // i.e. (orig - decoded)^2
  error = av1_block_error_fp(coeff, dqcoeff, block_size);
  // prediction residue^2 = (orig - ref)^2
  *ssz = av1_block_error_fp(coeff, ref, block_size);

  return error;
}
#endif

Yaowu Xu's avatar
Yaowu Xu committed
847 848
int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
                          intptr_t block_size, int64_t *ssz) {
Jingning Han's avatar
Jingning Han committed
849 850 851 852 853
  int i;
  int64_t error = 0, sqcoeff = 0;

  for (i = 0; i < block_size; i++) {
    const int diff = coeff[i] - dqcoeff[i];
854
    error += diff * diff;
Jingning Han's avatar
Jingning Han committed
855 856 857 858 859 860 861
    sqcoeff += coeff[i] * coeff[i];
  }

  *ssz = sqcoeff;
  return error;
}

Yaowu Xu's avatar
Yaowu Xu committed
862 863
int64_t av1_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff,
                             int block_size) {
Jingning Han's avatar
Jingning Han committed
864 865 866 867 868
  int i;
  int64_t error = 0;

  for (i = 0; i < block_size; i++) {
    const int diff = coeff[i] - dqcoeff[i];
869
    error += diff * diff;
Jingning Han's avatar
Jingning Han committed
870 871 872 873 874
  }

  return error;
}

Yaowu Xu's avatar
Yaowu Xu committed
875 876 877 878
#if CONFIG_AOM_HIGHBITDEPTH
int64_t av1_highbd_block_error_c(const tran_low_t *coeff,
                                 const tran_low_t *dqcoeff, intptr_t block_size,
                                 int64_t *ssz, int bd) {
Jingning Han's avatar
Jingning Han committed
879 880 881 882 883 884 885
  int i;
  int64_t error = 0, sqcoeff = 0;
  int shift = 2 * (bd - 8);
  int rounding = shift > 0 ? 1 << (shift - 1) : 0;

  for (i = 0; i < block_size; i++) {
    const int64_t diff = coeff[i] - dqcoeff[i];
886
    error += diff * diff;
Jingning Han's avatar
Jingning Han committed
887 888 889 890 891 892 893 894 895
    sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
  }
  assert(error >= 0 && sqcoeff >= 0);
  error = (error + rounding) >> shift;
  sqcoeff = (sqcoeff + rounding) >> shift;

  *ssz = sqcoeff;
  return error;
}
Yaowu Xu's avatar
Yaowu Xu committed
896
#endif  // CONFIG_AOM_HIGHBITDEPTH
Jingning Han's avatar
Jingning Han committed
897

898
#if !CONFIG_PVQ
899
/* The trailing '0' is a terminator which is used inside av1_cost_coeffs() to
Jingning Han's avatar
Jingning Han committed
900 901 902 903
 * decide whether to include cost of a trailing EOB node or not (i.e. we
 * can skip this if the last coefficient in this transform block, e.g. the
 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
 * were non-zero). */
904 905 906
int av1_cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane,
                    int block, int coeff_ctx, TX_SIZE tx_size,
                    const int16_t *scan, const int16_t *nb,
907
                    int use_fast_coef_costing) {
Jingning Han's avatar
Jingning Han committed
908 909 910 911 912
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
  const struct macroblock_plane *p = &x->plane[plane];
  const struct macroblockd_plane *pd = &xd->plane[plane];
  const PLANE_TYPE type = pd->plane_type;
913
  const uint16_t *band_count = &band_count_table[tx_size][1];
Jingning Han's avatar
Jingning Han committed
914 915
  const int eob = p->eobs[block];
  const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
916
  const int tx_size_ctx = txsize_sqr_map[tx_size];
917 918
  unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
      x->token_costs[tx_size_ctx][type][is_inter_block(mbmi)];
919
  uint8_t token_cache[MAX_TX_SQUARE];
920
  int pt = coeff_ctx;
Jingning Han's avatar
Jingning Han committed
921
  int c, cost;
Yaowu Xu's avatar
Yaowu Xu committed
922 923
#if CONFIG_AOM_HIGHBITDEPTH
  const int *cat6_high_cost = av1_get_high_cost_table(xd->bd);
Jingning Han's avatar
Jingning Han committed
924
#else
Yaowu Xu's avatar
Yaowu Xu committed
925
  const int *cat6_high_cost = av1_get_high_cost_table(8);
Jingning Han's avatar
Jingning Han committed
926 927
#endif

928
#if !CONFIG_VAR_TX && !CONFIG_SUPERTX
Jingning Han's avatar
Jingning Han committed
929 930 931
  // Check for consistency of tx_size with mode info
  assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
                              : get_uv_tx_size(mbmi, pd) == tx_size);
932
#endif  // !CONFIG_VAR_TX && !CONFIG_SUPERTX
933
  (void)cm;
Jingning Han's avatar
Jingning Han committed
934 935 936 937 938

  if (eob == 0) {
    // single eob token
    cost = token_costs[0][0][pt][EOB_TOKEN];
  } else {
Julia Robson's avatar
Julia Robson committed
939 940 941 942 943 944
    if (use_fast_coef_costing) {
      int band_left = *band_count++;

      // dc token
      int v = qcoeff[0];
      int16_t prev_t;
Yaowu Xu's avatar
Yaowu Xu committed
945
      cost = av1_get_token_cost(v, &prev_t, cat6_high_cost);
Julia Robson's avatar
Julia Robson committed
946 947
      cost += (*token_costs)[0][pt][prev_t];

Yaowu Xu's avatar
Yaowu Xu committed
948
      token_cache[0] = av1_pt_energy_class[prev_t];
Julia Robson's avatar
Julia Robson committed
949 950 951 952 953 954 955 956
      ++token_costs;

      // ac tokens
      for (c = 1; c < eob; c++) {
        const int rc = scan[c];
        int16_t t;

        v = qcoeff[rc];
Yaowu Xu's avatar
Yaowu Xu committed
957
        cost += av1_get_token_cost(v, &t, cat6_high_cost);
Julia Robson's avatar
Julia Robson committed
958 959 960 961 962 963
        cost += (*token_costs)[!prev_t][!prev_t][t];
        prev_t = t;
        if (!--band_left) {
          band_left = *band_count++;
          ++token_costs;
        }
Jingning Han's avatar
Jingning Han committed
964 965
      }

Julia Robson's avatar
Julia Robson committed
966
      // eob token
967
      if (band_left) cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
Julia Robson's avatar
Julia Robson committed
968 969 970 971 972 973 974

    } else {  // !use_fast_coef_costing
      int band_left = *band_count++;

      // dc token
      int v = qcoeff[0];
      int16_t tok;
975
      unsigned int(*tok_cost_ptr)[COEFF_CONTEXTS][ENTROPY_TOKENS];
Yaowu Xu's avatar
Yaowu Xu committed
976
      cost = av1_get_token_cost(v, &tok, cat6_high_cost);
Julia Robson's avatar
Julia Robson committed
977 978
      cost += (*token_costs)[0][pt][tok];

Yaowu Xu's avatar
Yaowu Xu committed
979
      token_cache[0] = av1_pt_energy_class[tok];
Julia Robson's avatar
Julia Robson committed
980 981 982 983 984 985 986 987 988
      ++token_costs;

      tok_cost_ptr = &((*token_costs)[!tok]);

      // ac tokens
      for (c = 1; c < eob; c++) {
        const int rc = scan[c];

        v = qcoeff[rc];
Yaowu Xu's avatar
Yaowu Xu committed
989
        cost += av1_get_token_cost(v, &tok, cat6_high_cost);
Julia Robson's avatar
Julia Robson committed
990 991
        pt = get_coef_context(nb, token_cache, c);
        cost += (*tok_cost_ptr)[pt][tok];
Yaowu Xu's avatar
Yaowu Xu committed
992
        token_cache[rc] = av1_pt_energy_class[tok];
Julia Robson's avatar
Julia Robson committed
993 994 995 996 997 998 999 1000 1001
        if (!--band_left) {
          band_left = *band_count++;
          ++token_costs;
        }
        tok_cost_ptr = &((*token_costs)[!tok]);
      }

      // eob token
      if (band_left) {
Jingning Han's avatar
Jingning Han committed
1002 1003 1004 1005 1006 1007 1008 1009
        pt = get_coef_context(nb, token_cache, c);
        cost += (*token_costs)[0][pt][EOB_TOKEN];
      }
    }
  }

  return cost;
}
1010
#endif
Jingning Han's avatar
Jingning Han committed
1011

Yaowu Xu's avatar
Yaowu Xu committed
1012 1013
static void dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane, int block,
                       int blk_row, int blk_col, TX_SIZE tx_size,
Jingning Han's avatar
Jingning Han committed
1014
                       int64_t *out_dist, int64_t *out_sse) {
1015
  MACROBLOCKD *const xd = &x->e_mbd;
Alex Converse's avatar
Alex Converse committed
1016 1017
  const struct macroblock_plane *const p = &x->plane[plane];
  const struct macroblockd_plane *const pd = &xd->plane[plane];
1018 1019 1020
  if (cpi->sf.use_transform_domain_distortion) {
    // Transform domain distortion computation is more accurate as it does
    // not involve an inverse transform, but it is less accurate.
Jingning Han's avatar
Jingning Han committed
1021
    const int buffer_length = tx_size_2d[tx_size];
1022
    int64_t this_sse;
1023
    int shift = (MAX_TX_SCALE - get_tx_scale(tx_size)) * 2;
1024 1025
    tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
    tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
1026
#if CONFIG_PVQ
1027
    tran_low_t *ref_coeff = BLOCK_OFFSET(pd->pvq_ref_coeff, block);
1028
#endif
Yaowu Xu's avatar
Yaowu Xu committed
1029
#if CONFIG_AOM_HIGHBITDEPTH
1030
    const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8;
1031
    *out_dist =
Jingning Han's avatar
Jingning Han committed
1032
        av1_highbd_block_error(coeff, dqcoeff, buffer_length, &this_sse, bd) >>
1033
        shift;
1034 1035 1036 1037
#elif CONFIG_PVQ
    *out_dist = av1_block_error2_c(coeff, dqcoeff, ref_coeff, buffer_length,
                                   &this_sse) >>
                shift;
Jingning Han's avatar
Jingning Han committed
1038 1039 1040
#else
    *out_dist =
        av1_block_error(coeff, dqcoeff, buffer_length, &this_sse) >> shift;
Yaowu Xu's avatar
Yaowu Xu committed
1041
#endif  // CONFIG_AOM_HIGHBITDEPTH
1042 1043 1044
    *out_sse = this_sse >> shift;
  } else {
    const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
Jingning Han's avatar
Jingning Han committed
1045 1046
    const int bsw = block_size_wide[tx_bsize];
    const int bsh = block_size_high[tx_bsize];
1047 1048
    const int src_stride = x->plane[plane].src.stride;
    const int dst_stride = xd->plane[plane].dst.stride;
Jingning Han's avatar
Jingning Han committed
1049 1050 1051 1052 1053
    // Scale the transform block index to pixel unit.
    const int src_idx = (blk_row * src_stride + blk_col)
                        << tx_size_wide_log2[0];
    const int dst_idx = (blk_row * dst_stride + blk_col)
                        << tx_size_wide_log2[0];
1054 1055 1056
    const uint8_t *src = &x->plane[plane].src.buf[src_idx];
    const uint8_t *dst = &xd->plane[plane].dst.buf[dst_idx];
    const tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
Alex Converse's avatar
Alex Converse committed
1057
    const uint16_t eob = p->eobs[block];
1058 1059 1060 1061

    unsigned int tmp;

    assert(cpi != NULL);
Jingning Han's avatar
Jingning Han committed
1062
    assert(tx_size_wide_log2[0] == tx_size_high_log2[0]);
1063 1064 1065 1066

    cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &tmp);
    *out_sse = (int64_t)tmp * 16;

Alex Converse's avatar
Alex Converse committed
1067
    if (eob) {
1068
      const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
Yaowu Xu's avatar
Yaowu Xu committed
1069
#if CONFIG_AOM_HIGHBITDEPTH
1070
      DECLARE_ALIGNED(16, uint16_t, recon16[MAX_TX_SQUARE]);
1071
      uint8_t *recon = (uint8_t *)recon16;