rdopt.c 396 KB
Newer Older
Jingning Han's avatar
Jingning Han committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include <assert.h>
#include <math.h>

#include "./vp10_rtcd.h"
#include "./vpx_dsp_rtcd.h"

17 18 19 20 21
#include "aom_dsp/vpx_dsp_common.h"
#include "aom_dsp/blend.h"
#include "aom_mem/vpx_mem.h"
#include "aom_ports/mem.h"
#include "aom_ports/system_state.h"
Jingning Han's avatar
Jingning Han committed
22

23 24 25 26 27 28 29 30 31 32 33 34
#include "av1/common/common.h"
#include "av1/common/common_data.h"
#include "av1/common/entropy.h"
#include "av1/common/entropymode.h"
#include "av1/common/idct.h"
#include "av1/common/mvref_common.h"
#include "av1/common/pred_common.h"
#include "av1/common/quant_common.h"
#include "av1/common/reconinter.h"
#include "av1/common/reconintra.h"
#include "av1/common/scan.h"
#include "av1/common/seg_common.h"
Jingning Han's avatar
Jingning Han committed
35

36 37 38 39 40 41 42 43 44 45 46 47
#include "av1/encoder/cost.h"
#include "av1/encoder/encodemb.h"
#include "av1/encoder/encodemv.h"
#include "av1/encoder/encoder.h"
#include "av1/encoder/hybrid_fwd_txfm.h"
#include "av1/encoder/mcomp.h"
#include "av1/encoder/palette.h"
#include "av1/encoder/quantize.h"
#include "av1/encoder/ratectrl.h"
#include "av1/encoder/rd.h"
#include "av1/encoder/rdopt.h"
#include "av1/encoder/aq_variance.h"
Jingning Han's avatar
Jingning Han committed
48

49 50 51
#if CONFIG_DUAL_FILTER
#if CONFIG_EXT_INTERP
static const int filter_sets[25][2] = {
52 53 54 55
  { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 }, { 0, 4 }, { 1, 0 }, { 1, 1 },
  { 1, 2 }, { 1, 3 }, { 1, 4 }, { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 },
  { 2, 4 }, { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 }, { 3, 4 }, { 4, 0 },
  { 4, 1 }, { 4, 2 }, { 4, 3 }, { 4, 4 },
56 57 58
};
#else
static const int filter_sets[9][2] = {
59 60
  { 0, 0 }, { 0, 1 }, { 0, 2 }, { 1, 0 }, { 1, 1 },
  { 1, 2 }, { 2, 0 }, { 2, 1 }, { 2, 2 },
61 62 63 64
};
#endif
#endif

65 66
#if CONFIG_EXT_REFS

67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
#define LAST_FRAME_MODE_MASK                                      \
  ((1 << INTRA_FRAME) | (1 << LAST2_FRAME) | (1 << LAST3_FRAME) | \
   (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
#define LAST2_FRAME_MODE_MASK                                    \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST3_FRAME) | \
   (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
#define LAST3_FRAME_MODE_MASK                                    \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
   (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
#define GOLDEN_FRAME_MODE_MASK                                   \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
   (1 << LAST3_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
#define BWDREF_FRAME_MODE_MASK                                   \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
   (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME))
#define ALTREF_FRAME_MODE_MASK                                   \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
   (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME))

#else

#define LAST_FRAME_MODE_MASK \
  ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
#define GOLDEN_FRAME_MODE_MASK \
  ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
#define ALTREF_FRAME_MODE_MASK \
  ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | (1 << INTRA_FRAME))
Jingning Han's avatar
Jingning Han committed
94

95 96
#endif  // CONFIG_EXT_REFS

97
#if CONFIG_EXT_REFS
98
#define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | (1 << BWDREF_FRAME) | 0x01)
99
#else
100
#define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | 0x01)
101
#endif  // CONFIG_EXT_REFS
Jingning Han's avatar
Jingning Han committed
102

103 104
#define MIN_EARLY_TERM_INDEX 3
#define NEW_MV_DISCOUNT_FACTOR 8
Jingning Han's avatar
Jingning Han committed
105

106 107 108 109 110 111
#if CONFIG_EXT_INTRA
#define ANGLE_FAST_SEARCH 1
#define ANGLE_SKIP_THRESH 10
#define FILTER_FAST_SEARCH 1
#endif  // CONFIG_EXT_INTRA

112 113
const double ADST_FLIP_SVM[8] = { -6.6623, -2.8062, -3.2531, 3.1671,    // vert
                                  -7.7051, -3.2234, -3.6193, 3.4533 };  // horz
114

Jingning Han's avatar
Jingning Han committed
115 116 117 118 119
typedef struct {
  PREDICTION_MODE mode;
  MV_REFERENCE_FRAME ref_frame[2];
} MODE_DEFINITION;

120
typedef struct { MV_REFERENCE_FRAME ref_frame[2]; } REF_DEFINITION;
Jingning Han's avatar
Jingning Han committed
121 122

struct rdcost_block_args {
Jingning Han's avatar
Jingning Han committed
123
  const VP10_COMP *cpi;
Jingning Han's avatar
Jingning Han committed
124
  MACROBLOCK *x;
125 126
  ENTROPY_CONTEXT t_above[2 * MAX_MIB_SIZE];
  ENTROPY_CONTEXT t_left[2 * MAX_MIB_SIZE];
Jingning Han's avatar
Jingning Han committed
127 128 129 130 131 132 133 134 135 136 137 138 139
  int this_rate;
  int64_t this_dist;
  int64_t this_sse;
  int64_t this_rd;
  int64_t best_rd;
  int exit_early;
  int use_fast_coef_costing;
  const scan_order *so;
  uint8_t skippable;
};

#define LAST_NEW_MV_INDEX 6
static const MODE_DEFINITION vp10_mode_order[MAX_MODES] = {
140
  { NEARESTMV, { LAST_FRAME, NONE } },
141
#if CONFIG_EXT_REFS
142 143 144
  { NEARESTMV, { LAST2_FRAME, NONE } },
  { NEARESTMV, { LAST3_FRAME, NONE } },
  { NEARESTMV, { BWDREF_FRAME, NONE } },
145
#endif  // CONFIG_EXT_REFS
146 147
  { NEARESTMV, { ALTREF_FRAME, NONE } },
  { NEARESTMV, { GOLDEN_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
148

149
  { DC_PRED, { INTRA_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
150

151
  { NEWMV, { LAST_FRAME, NONE } },
152
#if CONFIG_EXT_REFS
153 154 155
  { NEWMV, { LAST2_FRAME, NONE } },
  { NEWMV, { LAST3_FRAME, NONE } },
  { NEWMV, { BWDREF_FRAME, NONE } },
156
#endif  // CONFIG_EXT_REFS
157 158
  { NEWMV, { ALTREF_FRAME, NONE } },
  { NEWMV, { GOLDEN_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
159

160
  { NEARMV, { LAST_FRAME, NONE } },
161
#if CONFIG_EXT_REFS
162 163 164
  { NEARMV, { LAST2_FRAME, NONE } },
  { NEARMV, { LAST3_FRAME, NONE } },
  { NEARMV, { BWDREF_FRAME, NONE } },
165
#endif  // CONFIG_EXT_REFS
166 167
  { NEARMV, { ALTREF_FRAME, NONE } },
  { NEARMV, { GOLDEN_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
168

Yue Chen's avatar
Yue Chen committed
169
#if CONFIG_EXT_INTER
170
  { NEWFROMNEARMV, { LAST_FRAME, NONE } },
171
#if CONFIG_EXT_REFS
172 173 174
  { NEWFROMNEARMV, { LAST2_FRAME, NONE } },
  { NEWFROMNEARMV, { LAST3_FRAME, NONE } },
  { NEWFROMNEARMV, { BWDREF_FRAME, NONE } },
175
#endif  // CONFIG_EXT_REFS
176 177
  { NEWFROMNEARMV, { ALTREF_FRAME, NONE } },
  { NEWFROMNEARMV, { GOLDEN_FRAME, NONE } },
Yue Chen's avatar
Yue Chen committed
178 179
#endif  // CONFIG_EXT_INTER

180
  { ZEROMV, { LAST_FRAME, NONE } },
181
#if CONFIG_EXT_REFS
182 183 184
  { ZEROMV, { LAST2_FRAME, NONE } },
  { ZEROMV, { LAST3_FRAME, NONE } },
  { ZEROMV, { BWDREF_FRAME, NONE } },
185
#endif  // CONFIG_EXT_REFS
186 187
  { ZEROMV, { GOLDEN_FRAME, NONE } },
  { ZEROMV, { ALTREF_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
188

189
// TODO(zoeliu): May need to reconsider the order on the modes to check
190

191
#if CONFIG_EXT_INTER
192
  { NEAREST_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
193
#if CONFIG_EXT_REFS
194 195
  { NEAREST_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEAREST_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
196
#endif  // CONFIG_EXT_REFS
197
  { NEAREST_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
198
#if CONFIG_EXT_REFS
199 200 201 202
  { NEAREST_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
203
#endif  // CONFIG_EXT_REFS
204

205
#else  // CONFIG_EXT_INTER
206

207
  { NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
208
#if CONFIG_EXT_REFS
209 210
  { NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
211
#endif  // CONFIG_EXT_REFS
212
  { NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
213
#if CONFIG_EXT_REFS
214 215 216 217
  { NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
218
#endif  // CONFIG_EXT_REFS
219
#endif  // CONFIG_EXT_INTER
Jingning Han's avatar
Jingning Han committed
220

221
  { TM_PRED, { INTRA_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
222

223
#if CONFIG_EXT_INTER
224 225 226 227 228 229 230 231 232
  { NEAR_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEAREST_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEAR_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEW_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEAREST_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEW_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEAR_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEW_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
  { ZERO_ZEROMV, { LAST_FRAME, ALTREF_FRAME } },
233

234
#if CONFIG_EXT_REFS
235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253
  { NEAR_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEAREST_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEAR_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEW_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEAREST_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEW_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEAR_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEW_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
  { ZERO_ZEROMV, { LAST2_FRAME, ALTREF_FRAME } },

  { NEAR_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEAREST_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEAR_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEW_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEAREST_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEW_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEAR_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEW_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
  { ZERO_ZEROMV, { LAST3_FRAME, ALTREF_FRAME } },
254
#endif  // CONFIG_EXT_REFS
255

256 257 258 259 260 261 262 263 264
  { NEAR_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEAREST_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEAR_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEW_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEAREST_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEW_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEAR_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEW_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { ZERO_ZEROMV, { GOLDEN_FRAME, ALTREF_FRAME } },
265 266

#if CONFIG_EXT_REFS
267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305
  { NEAR_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEAR_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEW_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEAREST_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEW_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEAR_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEW_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
  { ZERO_ZEROMV, { LAST_FRAME, BWDREF_FRAME } },

  { NEAR_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEAR_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEW_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEAREST_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEW_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEAR_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEW_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
  { ZERO_ZEROMV, { LAST2_FRAME, BWDREF_FRAME } },

  { NEAR_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEAR_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEW_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEAREST_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEW_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEAR_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEW_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
  { ZERO_ZEROMV, { LAST3_FRAME, BWDREF_FRAME } },

  { NEAR_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEAR_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEW_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEAREST_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEW_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEAR_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEW_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { ZERO_ZEROMV, { GOLDEN_FRAME, BWDREF_FRAME } },
306
#endif  // CONFIG_EXT_REFS
307 308 309

#else  // CONFIG_EXT_INTER

310 311
  { NEARMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEWMV, { LAST_FRAME, ALTREF_FRAME } },
312
#if CONFIG_EXT_REFS
313 314 315 316
  { NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
317
#endif  // CONFIG_EXT_REFS
318 319
  { NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
320 321

#if CONFIG_EXT_REFS
322 323 324 325 326 327 328 329
  { NEARMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEWMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
330
#endif  // CONFIG_EXT_REFS
Jingning Han's avatar
Jingning Han committed
331

332
  { ZEROMV, { LAST_FRAME, ALTREF_FRAME } },
333
#if CONFIG_EXT_REFS
334 335
  { ZEROMV, { LAST2_FRAME, ALTREF_FRAME } },
  { ZEROMV, { LAST3_FRAME, ALTREF_FRAME } },
336
#endif  // CONFIG_EXT_REFS
337
  { ZEROMV, { GOLDEN_FRAME, ALTREF_FRAME } },
338 339

#if CONFIG_EXT_REFS
340 341 342 343
  { ZEROMV, { LAST_FRAME, BWDREF_FRAME } },
  { ZEROMV, { LAST2_FRAME, BWDREF_FRAME } },
  { ZEROMV, { LAST3_FRAME, BWDREF_FRAME } },
  { ZEROMV, { GOLDEN_FRAME, BWDREF_FRAME } },
344
#endif  // CONFIG_EXT_REFS
345

346
#endif  // CONFIG_EXT_INTER
Jingning Han's avatar
Jingning Han committed
347

348 349 350 351 352 353 354 355
  { H_PRED, { INTRA_FRAME, NONE } },
  { V_PRED, { INTRA_FRAME, NONE } },
  { D135_PRED, { INTRA_FRAME, NONE } },
  { D207_PRED, { INTRA_FRAME, NONE } },
  { D153_PRED, { INTRA_FRAME, NONE } },
  { D63_PRED, { INTRA_FRAME, NONE } },
  { D117_PRED, { INTRA_FRAME, NONE } },
  { D45_PRED, { INTRA_FRAME, NONE } },
356 357

#if CONFIG_EXT_INTER
358 359 360 361
  { ZEROMV, { LAST_FRAME, INTRA_FRAME } },
  { NEARESTMV, { LAST_FRAME, INTRA_FRAME } },
  { NEARMV, { LAST_FRAME, INTRA_FRAME } },
  { NEWMV, { LAST_FRAME, INTRA_FRAME } },
362 363

#if CONFIG_EXT_REFS
364 365 366 367 368 369 370 371 372
  { ZEROMV, { LAST2_FRAME, INTRA_FRAME } },
  { NEARESTMV, { LAST2_FRAME, INTRA_FRAME } },
  { NEARMV, { LAST2_FRAME, INTRA_FRAME } },
  { NEWMV, { LAST2_FRAME, INTRA_FRAME } },

  { ZEROMV, { LAST3_FRAME, INTRA_FRAME } },
  { NEARESTMV, { LAST3_FRAME, INTRA_FRAME } },
  { NEARMV, { LAST3_FRAME, INTRA_FRAME } },
  { NEWMV, { LAST3_FRAME, INTRA_FRAME } },
373 374
#endif  // CONFIG_EXT_REFS

375 376 377 378
  { ZEROMV, { GOLDEN_FRAME, INTRA_FRAME } },
  { NEARESTMV, { GOLDEN_FRAME, INTRA_FRAME } },
  { NEARMV, { GOLDEN_FRAME, INTRA_FRAME } },
  { NEWMV, { GOLDEN_FRAME, INTRA_FRAME } },
379

380
#if CONFIG_EXT_REFS
381 382 383 384
  { ZEROMV, { BWDREF_FRAME, INTRA_FRAME } },
  { NEARESTMV, { BWDREF_FRAME, INTRA_FRAME } },
  { NEARMV, { BWDREF_FRAME, INTRA_FRAME } },
  { NEWMV, { BWDREF_FRAME, INTRA_FRAME } },
385 386
#endif  // CONFIG_EXT_REFS

387 388 389 390
  { ZEROMV, { ALTREF_FRAME, INTRA_FRAME } },
  { NEARESTMV, { ALTREF_FRAME, INTRA_FRAME } },
  { NEARMV, { ALTREF_FRAME, INTRA_FRAME } },
  { NEWMV, { ALTREF_FRAME, INTRA_FRAME } },
391
#endif  // CONFIG_EXT_INTER
Jingning Han's avatar
Jingning Han committed
392 393 394
};

static const REF_DEFINITION vp10_ref_order[MAX_REFS] = {
395
  { { LAST_FRAME, NONE } },
396
#if CONFIG_EXT_REFS
397 398
  { { LAST2_FRAME, NONE } },          { { LAST3_FRAME, NONE } },
  { { BWDREF_FRAME, NONE } },
399
#endif  // CONFIG_EXT_REFS
400
  { { GOLDEN_FRAME, NONE } },         { { ALTREF_FRAME, NONE } },
401

402
  { { LAST_FRAME, ALTREF_FRAME } },
403
#if CONFIG_EXT_REFS
404
  { { LAST2_FRAME, ALTREF_FRAME } },  { { LAST3_FRAME, ALTREF_FRAME } },
405
#endif  // CONFIG_EXT_REFS
406
  { { GOLDEN_FRAME, ALTREF_FRAME } },
407 408

#if CONFIG_EXT_REFS
409 410
  { { LAST_FRAME, BWDREF_FRAME } },   { { LAST2_FRAME, BWDREF_FRAME } },
  { { LAST3_FRAME, BWDREF_FRAME } },  { { GOLDEN_FRAME, BWDREF_FRAME } },
411
#endif  // CONFIG_EXT_REFS
412

413
  { { INTRA_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
414 415
};

hui su's avatar
hui su committed
416 417
static INLINE int write_uniform_cost(int n, int v) {
  int l = get_unsigned_bits(n), m = (1 << l) - n;
418
  if (l == 0) return 0;
hui su's avatar
hui su committed
419 420 421 422 423 424
  if (v < m)
    return (l - 1) * vp10_cost_bit(128, 0);
  else
    return l * vp10_cost_bit(128, 0);
}

425 426 427
// constants for prune 1 and prune 2 decision boundaries
#define FAST_EXT_TX_CORR_MID 0.0
#define FAST_EXT_TX_EDST_MID 0.1
428 429 430 431
#define FAST_EXT_TX_CORR_MARGIN 0.5
#define FAST_EXT_TX_EDST_MARGIN 0.3

static const TX_TYPE_1D vtx_tab[TX_TYPES] = {
432
  DCT_1D,      ADST_1D, DCT_1D,      ADST_1D,
433
#if CONFIG_EXT_TX
434 435
  FLIPADST_1D, DCT_1D,  FLIPADST_1D, ADST_1D, FLIPADST_1D, IDTX_1D,
  DCT_1D,      IDTX_1D, ADST_1D,     IDTX_1D, FLIPADST_1D, IDTX_1D,
436 437 438 439
#endif  // CONFIG_EXT_TX
};

static const TX_TYPE_1D htx_tab[TX_TYPES] = {
440
  DCT_1D,  DCT_1D,      ADST_1D,     ADST_1D,
441
#if CONFIG_EXT_TX
442 443
  DCT_1D,  FLIPADST_1D, FLIPADST_1D, FLIPADST_1D, ADST_1D, IDTX_1D,
  IDTX_1D, DCT_1D,      IDTX_1D,     ADST_1D,     IDTX_1D, FLIPADST_1D,
444 445
#endif  // CONFIG_EXT_TX
};
446

447
static void get_energy_distribution_fine(const VP10_COMP *cpi, BLOCK_SIZE bsize,
448 449 450 451 452
                                         uint8_t *src, int src_stride,
                                         uint8_t *dst, int dst_stride,
                                         double *hordist, double *verdist) {
  int bw = 4 << (b_width_log2_lookup[bsize]);
  int bh = 4 << (b_height_log2_lookup[bsize]);
453
  unsigned int esq[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
454 455
  unsigned int var[16];
  double total = 0;
456

457
  const int f_index = bsize - BLOCK_16X16;
458 459 460 461
  if (f_index < 0) {
    int i, j, index;
    int w_shift = bw == 8 ? 1 : 2;
    int h_shift = bh == 8 ? 1 : 2;
462 463 464 465 466 467 468
#if CONFIG_VP9_HIGHBITDEPTH
    if (cpi->common.use_highbitdepth) {
      uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
      uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
      for (i = 0; i < bh; ++i)
        for (j = 0; j < bw; ++j) {
          index = (j >> w_shift) + ((i >> h_shift) << 2);
469 470 471
          esq[index] +=
              (src16[j + i * src_stride] - dst16[j + i * dst_stride]) *
              (src16[j + i * src_stride] - dst16[j + i * dst_stride]);
472 473 474 475 476 477 478 479 480 481 482 483 484
        }
    } else {
#endif  // CONFIG_VP9_HIGHBITDEPTH

      for (i = 0; i < bh; ++i)
        for (j = 0; j < bw; ++j) {
          index = (j >> w_shift) + ((i >> h_shift) << 2);
          esq[index] += (src[j + i * src_stride] - dst[j + i * dst_stride]) *
                        (src[j + i * src_stride] - dst[j + i * dst_stride]);
        }
#if CONFIG_VP9_HIGHBITDEPTH
    }
#endif  // CONFIG_VP9_HIGHBITDEPTH
485
  } else {
486 487 488 489 490
    var[0] = cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[0]);
    var[1] = cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4,
                                     dst_stride, &esq[1]);
    var[2] = cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2,
                                     dst_stride, &esq[2]);
491 492 493 494 495
    var[3] = cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride,
                                     dst + 3 * bw / 4, dst_stride, &esq[3]);
    src += bh / 4 * src_stride;
    dst += bh / 4 * dst_stride;

496 497 498 499 500
    var[4] = cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[4]);
    var[5] = cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4,
                                     dst_stride, &esq[5]);
    var[6] = cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2,
                                     dst_stride, &esq[6]);
501 502 503 504 505
    var[7] = cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride,
                                     dst + 3 * bw / 4, dst_stride, &esq[7]);
    src += bh / 4 * src_stride;
    dst += bh / 4 * dst_stride;

506 507 508 509 510
    var[8] = cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[8]);
    var[9] = cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4,
                                     dst_stride, &esq[9]);
    var[10] = cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2,
                                      dst_stride, &esq[10]);
511 512 513 514 515
    var[11] = cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride,
                                      dst + 3 * bw / 4, dst_stride, &esq[11]);
    src += bh / 4 * src_stride;
    dst += bh / 4 * dst_stride;

516 517 518 519 520 521
    var[12] =
        cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[12]);
    var[13] = cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4,
                                      dst_stride, &esq[13]);
    var[14] = cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2,
                                      dst_stride, &esq[14]);
522 523 524 525
    var[15] = cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride,
                                      dst + 3 * bw / 4, dst_stride, &esq[15]);
  }

526 527 528
  total = esq[0] + esq[1] + esq[2] + esq[3] + esq[4] + esq[5] + esq[6] +
          esq[7] + esq[8] + esq[9] + esq[10] + esq[11] + esq[12] + esq[13] +
          esq[14] + esq[15];
529 530
  if (total > 0) {
    const double e_recip = 1.0 / total;
531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548
    hordist[0] =
        ((double)esq[0] + (double)esq[4] + (double)esq[8] + (double)esq[12]) *
        e_recip;
    hordist[1] =
        ((double)esq[1] + (double)esq[5] + (double)esq[9] + (double)esq[13]) *
        e_recip;
    hordist[2] =
        ((double)esq[2] + (double)esq[6] + (double)esq[10] + (double)esq[14]) *
        e_recip;
    verdist[0] =
        ((double)esq[0] + (double)esq[1] + (double)esq[2] + (double)esq[3]) *
        e_recip;
    verdist[1] =
        ((double)esq[4] + (double)esq[5] + (double)esq[6] + (double)esq[7]) *
        e_recip;
    verdist[2] =
        ((double)esq[8] + (double)esq[9] + (double)esq[10] + (double)esq[11]) *
        e_recip;
549 550 551 552 553
  } else {
    hordist[0] = verdist[0] = 0.25;
    hordist[1] = verdist[1] = 0.25;
    hordist[2] = verdist[2] = 0.25;
  }
554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569
  (void)var[0];
  (void)var[1];
  (void)var[2];
  (void)var[3];
  (void)var[4];
  (void)var[5];
  (void)var[6];
  (void)var[7];
  (void)var[8];
  (void)var[9];
  (void)var[10];
  (void)var[11];
  (void)var[12];
  (void)var[13];
  (void)var[14];
  (void)var[15];
570 571
}

572
static int adst_vs_flipadst(const VP10_COMP *cpi, BLOCK_SIZE bsize,
573 574
                            uint8_t *src, int src_stride, uint8_t *dst,
                            int dst_stride, double *hdist, double *vdist) {
575 576
  int prune_bitmask = 0;
  double svm_proj_h = 0, svm_proj_v = 0;
577 578
  get_energy_distribution_fine(cpi, bsize, src, src_stride, dst, dst_stride,
                               hdist, vdist);
579

580
  svm_proj_v = vdist[0] * ADST_FLIP_SVM[0] + vdist[1] * ADST_FLIP_SVM[1] +
581
               vdist[2] * ADST_FLIP_SVM[2] + ADST_FLIP_SVM[3];
582
  svm_proj_h = hdist[0] * ADST_FLIP_SVM[4] + hdist[1] * ADST_FLIP_SVM[5] +
583 584 585 586 587 588 589 590 591 592 593 594 595 596 597
               hdist[2] * ADST_FLIP_SVM[6] + ADST_FLIP_SVM[7];
  if (svm_proj_v > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN)
    prune_bitmask |= 1 << FLIPADST_1D;
  else if (svm_proj_v < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN)
    prune_bitmask |= 1 << ADST_1D;

  if (svm_proj_h > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN)
    prune_bitmask |= 1 << (FLIPADST_1D + 8);
  else if (svm_proj_h < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN)
    prune_bitmask |= 1 << (ADST_1D + 8);

  return prune_bitmask;
}

#if CONFIG_EXT_TX
598
static void get_horver_correlation(int16_t *diff, int stride, int w, int h,
599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626
                                   double *hcorr, double *vcorr) {
  // Returns hor/ver correlation coefficient
  const int num = (h - 1) * (w - 1);
  double num_r;
  int i, j;
  int64_t xy_sum = 0, xz_sum = 0;
  int64_t x_sum = 0, y_sum = 0, z_sum = 0;
  int64_t x2_sum = 0, y2_sum = 0, z2_sum = 0;
  double x_var_n, y_var_n, z_var_n, xy_var_n, xz_var_n;
  *hcorr = *vcorr = 1;

  assert(num > 0);
  num_r = 1.0 / num;
  for (i = 1; i < h; ++i) {
    for (j = 1; j < w; ++j) {
      const int16_t x = diff[i * stride + j];
      const int16_t y = diff[i * stride + j - 1];
      const int16_t z = diff[(i - 1) * stride + j];
      xy_sum += x * y;
      xz_sum += x * z;
      x_sum += x;
      y_sum += y;
      z_sum += z;
      x2_sum += x * x;
      y2_sum += y * y;
      z2_sum += z * z;
    }
  }
627 628 629
  x_var_n = x2_sum - (x_sum * x_sum) * num_r;
  y_var_n = y2_sum - (y_sum * y_sum) * num_r;
  z_var_n = z2_sum - (z_sum * z_sum) * num_r;
630 631 632 633 634 635 636 637 638 639 640 641
  xy_var_n = xy_sum - (x_sum * y_sum) * num_r;
  xz_var_n = xz_sum - (x_sum * z_sum) * num_r;
  if (x_var_n > 0 && y_var_n > 0) {
    *hcorr = xy_var_n / sqrt(x_var_n * y_var_n);
    *hcorr = *hcorr < 0 ? 0 : *hcorr;
  }
  if (x_var_n > 0 && z_var_n > 0) {
    *vcorr = xz_var_n / sqrt(x_var_n * z_var_n);
    *vcorr = *vcorr < 0 ? 0 : *vcorr;
  }
}

642 643
int dct_vs_idtx(int16_t *diff, int stride, int w, int h, double *hcorr,
                double *vcorr) {
644 645 646 647
  int prune_bitmask = 0;
  get_horver_correlation(diff, stride, w, h, hcorr, vcorr);

  if (*vcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN)
648
    prune_bitmask |= 1 << IDTX_1D;
649 650 651 652
  else if (*vcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN)
    prune_bitmask |= 1 << DCT_1D;

  if (*hcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN)
653
    prune_bitmask |= 1 << (IDTX_1D + 8);
654 655 656 657 658 659
  else if (*hcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN)
    prune_bitmask |= 1 << (DCT_1D + 8);
  return prune_bitmask;
}

// Performance drop: 0.5%, Speed improvement: 24%
660 661
static int prune_two_for_sby(const VP10_COMP *cpi, BLOCK_SIZE bsize,
                             MACROBLOCK *x, MACROBLOCKD *xd, int adst_flipadst,
662
                             int dct_idtx) {
663 664 665 666 667
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];
  const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
  const int bw = 4 << (b_width_log2_lookup[bs]);
  const int bh = 4 << (b_height_log2_lookup[bs]);
668
  double hdist[3] = { 0, 0, 0 }, vdist[3] = { 0, 0, 0 };
669
  double hcorr, vcorr;
670
  int prune = 0;
671
  vp10_subtract_plane(x, bsize, 0);
672

673 674 675
  if (adst_flipadst)
    prune |= adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride,
                              pd->dst.buf, pd->dst.stride, hdist, vdist);
676
  if (dct_idtx) prune |= dct_vs_idtx(p->src_diff, bw, bw, bh, &hcorr, &vcorr);
677 678 679

  return prune;
}
680 681
#endif  // CONFIG_EXT_TX

682
// Performance drop: 0.3%, Speed improvement: 5%
683 684
static int prune_one_for_sby(const VP10_COMP *cpi, BLOCK_SIZE bsize,
                             MACROBLOCK *x, MACROBLOCKD *xd) {
685 686
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];
687
  double hdist[3] = { 0, 0, 0 }, vdist[3] = { 0, 0, 0 };
688 689 690
  vp10_subtract_plane(x, bsize, 0);
  return adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride, pd->dst.buf,
                          pd->dst.stride, hdist, vdist);
691 692
}

693
static int prune_tx_types(const VP10_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x,
694 695 696 697
                          MACROBLOCKD *xd, int tx_set) {
#if CONFIG_EXT_TX
  const int *tx_set_1D = ext_tx_used_inter_1D[tx_set];
#else
698
  const int tx_set_1D[TX_TYPES_1D] = { 0 };
699 700
#endif

701
  switch (cpi->sf.tx_type_search.prune_mode) {
702 703
    case NO_PRUNE: return 0; break;
    case PRUNE_ONE:
704 705
      if ((tx_set >= 0) & !(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D]))
        return 0;
706 707
      return prune_one_for_sby(cpi, bsize, x, xd);
      break;
708 709
#if CONFIG_EXT_TX
    case PRUNE_TWO:
710
      if ((tx_set >= 0) & !(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D])) {
711
        if (!(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D])) return 0;
712 713 714 715 716
        return prune_two_for_sby(cpi, bsize, x, xd, 0, 1);
      }
      if ((tx_set >= 0) & !(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D]))
        return prune_two_for_sby(cpi, bsize, x, xd, 1, 0);
      return prune_two_for_sby(cpi, bsize, x, xd, 1, 1);
717
      break;
718
#endif
719 720 721 722 723
  }
  assert(0);
  return 0;
}

724
static int do_tx_type_search(TX_TYPE tx_type, int prune) {
725 726 727
// TODO(sarahparker) implement for non ext tx
#if CONFIG_EXT_TX
  return !(((prune >> vtx_tab[tx_type]) & 1) |
728
           ((prune >> (htx_tab[tx_type] + 8)) & 1));
729 730
#else
  // temporary to avoid compiler warnings
731 732 733 734
  (void)vtx_tab;
  (void)htx_tab;
  (void)tx_type;
  (void)prune;
735 736 737 738
  return 1;
#endif
}

Geza Lore's avatar
Geza Lore committed
739
static void model_rd_from_sse(const VP10_COMP *const cpi,
740 741
                              const MACROBLOCKD *const xd, BLOCK_SIZE bsize,
                              int plane, int64_t sse, int *rate,
Geza Lore's avatar
Geza Lore committed
742 743 744 745
                              int64_t *dist) {
  const struct macroblockd_plane *const pd = &xd->plane[plane];
  const int dequant_shift =
#if CONFIG_VP9_HIGHBITDEPTH
746
      (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 :
Geza Lore's avatar
Geza Lore committed
747
#endif  // CONFIG_VP9_HIGHBITDEPTH
748
                                                    3;
Geza Lore's avatar
Geza Lore committed
749 750 751 752 753 754 755

  // Fast approximate the modelling function.
  if (cpi->sf.simple_model_rd_from_var) {
    const int64_t square_error = sse;
    int quantizer = (pd->dequant[1] >> dequant_shift);

    if (quantizer < 120)
Yaowu Xu's avatar
Yaowu Xu committed
756
      *rate = (int)((square_error * (280 - quantizer)) >>
757
                    (16 - VP10_PROB_COST_SHIFT));
Geza Lore's avatar
Geza Lore committed
758 759 760 761 762
    else
      *rate = 0;
    *dist = (square_error * quantizer) >> 8;
  } else {
    vp10_model_rd_from_var_lapndz(sse, num_pels_log2_lookup[bsize],
763
                                  pd->dequant[1] >> dequant_shift, rate, dist);
Geza Lore's avatar
Geza Lore committed
764 765 766 767 768 769
  }

  *dist <<= 4;
}

static void model_rd_for_sb(const VP10_COMP *const cpi, BLOCK_SIZE bsize,
770 771 772 773
                            MACROBLOCK *x, MACROBLOCKD *xd, int plane_from,
                            int plane_to, int *out_rate_sum,
                            int64_t *out_dist_sum, int *skip_txfm_sb,
                            int64_t *skip_sse_sb) {
Jingning Han's avatar
Jingning Han committed
774 775 776
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
Geza Lore's avatar
Geza Lore committed
777 778 779
  int plane;
  const int ref = xd->mi[0]->mbmi.ref_frame[0];

Jingning Han's avatar
Jingning Han committed
780 781 782 783 784 785
  int64_t rate_sum = 0;
  int64_t dist_sum = 0;
  int64_t total_sse = 0;

  x->pred_sse[ref] = 0;

Geza Lore's avatar
Geza Lore committed
786 787 788
  for (plane = plane_from; plane <= plane_to; ++plane) {
    struct macroblock_plane *const p = &x->plane[plane];
    struct macroblockd_plane *const pd = &xd->plane[plane];
Jingning Han's avatar
Jingning Han committed
789 790
    const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);

Geza Lore's avatar
Geza Lore committed
791 792 793
    unsigned int sse;
    int rate;
    int64_t dist;
Jingning Han's avatar
Jingning Han committed
794

Geza Lore's avatar
Geza Lore committed
795 796
    // TODO(geza): Write direct sse functions that do not compute
    // variance as well.
797 798
    cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride,
                       &sse);
Jingning Han's avatar
Jingning Han committed
799

800
    if (plane == 0) x->pred_sse[ref] = sse;
Jingning Han's avatar
Jingning Han committed
801

Geza Lore's avatar
Geza Lore committed
802
    total_sse += sse;
Jingning Han's avatar
Jingning Han committed
803

Geza Lore's avatar
Geza Lore committed
804
    model_rd_from_sse(cpi, xd, bs, plane, sse, &rate, &dist);
Geza Lore's avatar
Geza Lore committed
805 806 807

    rate_sum += rate;
    dist_sum += dist;
Jingning Han's avatar
Jingning Han committed
808 809
  }

Geza Lore's avatar
Geza Lore committed
810
  *skip_txfm_sb = total_sse == 0;
Jingning Han's avatar
Jingning Han committed
811 812
  *skip_sse_sb = total_sse << 4;
  *out_rate_sum = (int)rate_sum;
Geza Lore's avatar
Geza Lore committed
813
  *out_dist_sum = dist_sum;
Jingning Han's avatar
Jingning Han committed
814 815 816
}

int64_t vp10_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
817
                           intptr_t block_size, int64_t *ssz) {
Jingning Han's avatar
Jingning Han committed
818 819 820 821 822
  int i;
  int64_t error = 0, sqcoeff = 0;

  for (i = 0; i < block_size; i++) {
    const int diff = coeff[i] - dqcoeff[i];
823
    error += diff * diff;
Jingning Han's avatar
Jingning Han committed
824 825 826 827 828 829 830 831
    sqcoeff += coeff[i] * coeff[i];
  }

  *ssz = sqcoeff;
  return error;
}

int64_t vp10_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff,
832
                              int block_size) {
Jingning Han's avatar
Jingning Han committed
833 834 835 836 837
  int i;
  int64_t error = 0;

  for (i = 0; i < block_size; i++) {
    const int diff = coeff[i] - dqcoeff[i];
838
    error += diff * diff;
Jingning Han's avatar
Jingning Han committed
839 840 841 842 843 844 845
  }

  return error;
}

#if CONFIG_VP9_HIGHBITDEPTH
int64_t vp10_highbd_block_error_c(const tran_low_t *coeff,
846 847
                                  const tran_low_t *dqcoeff,
                                  intptr_t block_size, int64_t *ssz, int bd) {
Jingning Han's avatar
Jingning Han committed
848 849 850 851 852 853 854
  int i;
  int64_t error = 0, sqcoeff = 0;
  int shift = 2 * (bd - 8);
  int rounding = shift > 0 ? 1 << (shift - 1) : 0;

  for (i = 0; i < block_size; i++) {
    const int64_t diff = coeff[i] - dqcoeff[i];
855
    error += diff * diff;
Jingning Han's avatar
Jingning Han committed
856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871
    sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
  }
  assert(error >= 0 && sqcoeff >= 0);
  error = (error + rounding) >> shift;
  sqcoeff = (sqcoeff + rounding) >> shift;

  *ssz = sqcoeff;
  return error;
}
#endif  // CONFIG_VP9_HIGHBITDEPTH

/* The trailing '0' is a terminator which is used inside cost_coeffs() to
 * decide whether to include cost of a trailing EOB node or not (i.e. we
 * can skip this if the last coefficient in this transform block, e.g. the
 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
 * were non-zero). */
872
static int cost_coeffs(MACROBLOCK *x, int plane, int block,
873 874 875
#if CONFIG_VAR_TX
                       int coeff_ctx,
#else
Jingning Han's avatar
Jingning Han committed
876
                       ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
877
#endif
878
                       TX_SIZE tx_size, const int16_t *scan, const int16_t *nb,
Jingning Han's avatar
Jingning Han committed
879 880 881 882 883 884
                       int use_fast_coef_costing) {
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
  const struct macroblock_plane *p = &x->plane[plane];
  const struct macroblockd_plane *pd = &xd->plane[plane];
  const PLANE_TYPE type = pd->plane_type;
885
  const uint16_t *band_count = &band_count_table[tx_size][1];
Jingning Han's avatar
Jingning Han committed
886 887
  const int eob = p->eobs[block];
  const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
888
  const int tx_size_ctx = txsize_sqr_map[tx_size];
889 890
  unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
      x->token_costs[tx_size_ctx][type][is_inter_block(mbmi)];
891
  uint8_t token_cache[MAX_TX_SQUARE];
892 893 894
#if CONFIG_VAR_TX
  int pt = coeff_ctx;
#else
Jingning Han's avatar
Jingning Han committed
895
  int pt = combine_entropy_contexts(*A, *L);
896
#endif
Jingning Han's avatar
Jingning Han committed
897 898
  int c, cost;
#if CONFIG_VP9_HIGHBITDEPTH
899
  const int *cat6_high_cost = vp10_get_high_cost_table(xd->bd);
Jingning Han's avatar
Jingning Han committed
900
#else
901
  const int *cat6_high_cost = vp10_get_high_cost_table(8);
Jingning Han's avatar
Jingning Han committed
902 903
#endif

904
#if !CONFIG_VAR_TX && !CONFIG_SUPERTX
Jingning Han's avatar
Jingning Han committed
905 906 907
  // Check for consistency of tx_size with mode info
  assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
                              : get_uv_tx_size(mbmi, pd) == tx_size);
908
#endif  // !CONFIG_VAR_TX && !CONFIG_SUPERTX
Jingning Han's avatar
Jingning Han committed
909 910 911 912 913 914

  if (eob == 0) {
    // single eob token
    cost = token_costs[0][0][pt][EOB_TOKEN];
    c = 0;
  } else {
Julia Robson's avatar
Julia Robson committed
915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939
    if (use_fast_coef_costing) {
      int band_left = *band_count++;

      // dc token
      int v = qcoeff[0];
      int16_t prev_t;
      cost = vp10_get_token_cost(v, &prev_t, cat6_high_cost);
      cost += (*token_costs)[0][pt][prev_t];

      token_cache[0] = vp10_pt_energy_class[prev_t];
      ++token_costs;

      // ac tokens
      for (c = 1; c < eob; c++) {
        const int rc = scan[c];
        int16_t t;

        v = qcoeff[rc];
        cost += vp10_get_token_cost(v, &t, cat6_high_cost);
        cost += (*token_costs)[!prev_t][!prev_t][t];
        prev_t = t;
        if (!--band_left) {
          band_left = *band_count++;
          ++token_costs;
        }
Jingning Han's avatar
Jingning Han committed
940 941
      }

Julia Robson's avatar
Julia Robson committed
942
      // eob token
943
      if (band_left) cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
Julia Robson's avatar
Julia Robson committed
944 945 946 947 948 949 950

    } else {  // !use_fast_coef_costing
      int band_left = *band_count++;

      // dc token
      int v = qcoeff[0];
      int16_t tok;
951
      unsigned int(*tok_cost_ptr)[COEFF_CONTEXTS][ENTROPY_TOKENS];
Julia Robson's avatar
Julia Robson committed
952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977
      cost = vp10_get_token_cost(v, &tok, cat6_high_cost);
      cost += (*token_costs)[0][pt][tok];

      token_cache[0] = vp10_pt_energy_class[tok];
      ++token_costs;

      tok_cost_ptr = &((*token_costs)[!tok]);

      // ac tokens
      for (c = 1; c < eob; c++) {
        const int rc = scan[c];

        v = qcoeff[rc];
        cost += vp10_get_token_cost(v, &tok, cat6_high_cost);
        pt = get_coef_context(nb, token_cache, c);
        cost += (*tok_cost_ptr)[pt][tok];
        token_cache[rc] = vp10_pt_energy_class[tok];
        if (!--band_left) {
          band_left = *band_count++;
          ++token_costs;
        }
        tok_cost_ptr = &((*token_costs)[!tok]);
      }

      // eob token
      if (band_left) {
Jingning Han's avatar
Jingning Han committed
978 979 980 981 982 983
        pt = get_coef_context(nb, token_cache, c);
        cost += (*token_costs)[0][pt][EOB_TOKEN];
      }
    }
  }

984
#if !CONFIG_VAR_TX
Jingning Han's avatar
Jingning Han committed
985 986
  // is eob first coefficient;
  *A = *L = (c > 0);
987
#endif
Jingning Han's avatar
Jingning Han committed
988 989 990 991

  return cost;
}

992 993
static void dist_block(const VP10_COMP *cpi, MACROBLOCK *x, int plane,
                       int block, int blk_row, int blk_col, TX_SIZE tx_size,
Jingning Han's avatar
Jingning Han committed
994
                       int64_t *out_dist, int64_t *out_sse) {
995
  MACROBLOCKD *const xd = &x->e_mbd;
Alex Converse's avatar
Alex Converse committed
996 997
  const struct macroblock_plane *const p = &x->plane[plane];
  const struct macroblockd_plane *const pd = &xd->plane[plane];
998 999 1000
  if (cpi->sf.use_transform_domain_distortion) {
    // Transform domain distortion computation is more accurate as it does
    // not involve an inverse transform, but it is less accurate.
1001
    const int ss_txfrm_size = num_4x4_blocks_txsize_log2_lookup[tx_size];
1002
    int64_t this_sse;
Angie Chiang's avatar
Angie Chiang committed
1003 1004
    int tx_type = get_tx_type(pd->plane_type, xd, block, tx_size);
    int shift = (MAX_TX_SCALE - get_tx_scale(xd, tx_type, tx_size)) * 2;
1005 1006 1007 1008 1009
    tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
    tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
#if CONFIG_VP9_HIGHBITDEPTH
    const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8;
    *out_dist = vp10_highbd_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
1010 1011
                                        &this_sse, bd) >>
                shift;
1012
#else
1013 1014 1015
    *out_dist =
        vp10_block_error(coeff, dqcoeff, 16 << ss_txfrm_size, &this_sse) >>
        shift;
1016 1017 1018 1019
#endif  // CONFIG_VP9_HIGHBITDEPTH
    *out_sse = this_sse >> shift;
  } else {
    const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
1020 1021
    const int bsw = 4 * num_4x4_blocks_wide_lookup[tx_bsize];
    const int bsh = 4 * num_4x4_blocks_high_lookup[tx_bsize];
1022 1023 1024 1025 1026 1027 1028
    const int src_stride = x->plane[plane].src.stride;
    const int dst_stride = xd->plane[plane].dst.stride;
    const int src_idx = 4 * (blk_row * src_stride + blk_col);
    const int dst_idx = 4 * (blk_row * dst_stride + blk_col);
    const uint8_t *src = &x->plane[plane].src.buf[src_idx];
    const uint8_t *dst = &xd->plane[plane].dst.buf[dst_idx];
    const tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
Alex Converse's avatar
Alex Converse committed
1029
    const uint16_t eob = p->eobs[block];
1030 1031 1032 1033 1034 1035 1036 1037

    unsigned int tmp;

    assert(cpi != NULL);

    cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &tmp);
    *out_sse = (int64_t)tmp * 16;

Alex Converse's avatar
Alex Converse committed
1038
    if (eob) {
1039
      const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
Jingning Han's avatar
Jingning Han committed
1040
#if CONFIG_VP9_HIGHBITDEPTH
1041
      DECLARE_ALIGNED(16, uint16_t, recon16[MAX_TX_SQUARE]);
1042
      uint8_t *recon = (uint8_t *)recon16;
Jingning Han's avatar
Jingning Han committed
1043
#else
1044
      DECLARE_ALIGNED(16, uint8_t, recon[MAX_TX_SQUARE]);
1045 1046 1047 1048 1049 1050 1051 1052
#endif  // CONFIG_VP9_HIGHBITDEPTH

      const PLANE_TYPE plane_type = plane == 0 ? PLANE_TYPE_Y : PLANE_TYPE_UV;

      INV_TXFM_PARAM inv_txfm_param;

      inv_txfm_param.tx_type = get_tx_type(plane_type, xd, block, tx_size);
      inv_txfm_param.tx_size = tx_size;
Alex Converse's avatar
Alex Converse committed
1053
      inv_txfm_param.eob = eob;
1054 1055 1056 1057 1058 1059
      inv_txfm_param.lossless = xd->lossless[mbmi->segment_id];

#if CONFIG_VP9_HIGHBITDEPTH
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        recon = CONVERT_TO_BYTEPTR(recon);
        inv_txfm_param.bd = xd->bd;
1060 1061
        vpx_highbd_convolve_copy(dst, dst_stride, recon, MAX_TX_SIZE, NULL, 0,
                                 NULL, 0, bsw, bsh, xd->bd);
1062
        highbd_inv_txfm_add(dqcoeff, recon, MAX_TX_SIZE, &inv_txfm_param);
1063
      } else
Jingning Han's avatar
Jingning Han committed
1064
#endif  // CONFIG_VP9_HIGHBITDEPTH
1065
      {
1066 1067
        vpx_convolve_copy(dst, dst_stride, recon, MAX_TX_SIZE, NULL, 0, NULL, 0,
                          bsw, bsh);
1068
        inv_txfm_add(dqcoeff, recon, MAX_TX_SIZE, &inv_txfm_param);
1069 1070
      }

1071
      cpi->fn_ptr[tx_bsize].vf(src, src_stride, recon, MAX_TX_SIZE, &tmp);
1072 1073 1074 1075
    }

    *out_dist = (int64_t)tmp * 16;
  }
Jingning Han's avatar
Jingning Han committed
1076 1077
}

1078
static int rate_block(int plane, int block, int blk_row, int blk_col,
1079
                      TX_SIZE tx_size, struct rdcost_block_args *args) {
1080 1081 1082
#if CONFIG_VAR_TX
  int coeff_ctx = combine_entropy_contexts(*(args->t_above + blk_col),
                                           *(args->t_left + blk_row));
1083 1084 1085
  int coeff_cost =
      cost_coeffs(args->x, plane, block, coeff_ctx, tx_size, args->so->scan,
                  args->so->neighbors, args->use_fast_coef_costing);
1086 1087
  const struct macroblock_plane *p = &args->x->plane[plane];
  *(args->t_above + blk_col) = !(p->eobs[block] == 0);
1088
  *(args->t_left + blk_row) = !(p->eobs[block] == 0);
1089 1090
  return coeff_cost;
#else
1091 1092 1093
  return cost_coeffs(args->x, plane, block, args->t_above + blk_col,
                     args->t_left + blk_row, tx_size, args->so->scan,
                     args->so->neighbors, args->use_fast_coef_costing);
1094
#endif  // CONFIG_VAR_TX
Jingning Han's avatar
Jingning Han committed
1095 1096
}

1097 1098 1099 1100 1101 1102 1103
static uint64_t sum_squares_2d(const int16_t *diff, int diff_stride,
                               TX_SIZE tx_size) {
  uint64_t sse;
  switch (tx_size) {
#if CONFIG_EXT_TX
    case TX_4X8:
      sse = vpx_sum_squares_2d_i16(diff, diff_stride, 4) +
1104
            vpx_sum_squares_2d_i16(diff + 4 * diff_stride, diff_stride, 4);
1105 1106 1107
      break;
    case TX_8X4:
      sse = vpx_sum_squares_2d_i16(diff, diff_stride, 4) +
1108
            vpx_sum_squares_2d_i16(diff + 4, diff_stride, 4);
1109
      break;
1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125
    case TX_8X16:
      sse = vpx_sum_squares_2d_i16(diff, diff_stride, 8) +
            vpx_sum_squares_2d_i16(diff + 8 * diff_stride, diff_stride, 8);
      break;
    case TX_16X8:
      sse = vpx_sum_squares_2d_i16(diff, diff_stride, 8) +
            vpx_sum_squares_2d_i16(diff + 8, diff_stride, 8);
      break;
    case TX_16X32:
      sse = vpx_sum_squares_2d_i16(diff, diff_stride, 16) +
            vpx_sum_squares_2d_i16(diff + 16 * diff_stride, diff_stride, 16);
      break;
    case TX_32X16:
      sse = vpx_sum_squares_2d_i16(diff, diff_stride, 16) +
            vpx_sum_squares_2d_i16(diff + 16, diff_stride, 16);
      break;
1126 1127 1128 1129 1130 1131 1132 1133 1134 1135
#endif  // CONFIG_EXT_TX
    default:
      assert(tx_size < TX_SIZES);
      sse = vpx_sum_squares_2d_i16(
          diff, diff_stride, num_4x4_blocks_wide_txsize_lookup[tx_size] << 2);
      break;
  }
  return sse;
}

1136
static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
1137
                          BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
Jingning Han's avatar
Jingning Han committed
1138 1139 1140 1141 1142 1143 1144 1145
  struct rdcost_block_args *args = arg;
  MACROBLOCK *const x = args->x;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
  int64_t rd1, rd2, rd;
  int rate;
  int64_t dist;
  int64_t sse;
1146 1147
  int coeff_ctx = combine_entropy_contexts(*(args->t_above + blk_col),
                                           *(args->t_left + blk_row));
Jingning Han's avatar
Jingning Han committed
1148

1149
  if (args->exit_early) return;
Jingning Han's avatar
Jingning Han committed
1150 1151

  if (!is_inter_block(mbmi)) {
1152
    struct encode_b_args intra_arg = {
1153 1154 1155 1156
      x, NULL, &mbmi->skip, args->t_above, args->t_left, 1
    };
    vp10_encode_block_intra(plane, block, blk_row, blk_col, plane_bsize,
                            tx_size, &intra_arg);