rdopt.c 411 KB
Newer Older
Jingning Han's avatar
Jingning Han committed
1
/*
Yaowu Xu's avatar
Yaowu Xu committed
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Jingning Han's avatar
Jingning Han committed
3
 *
Yaowu Xu's avatar
Yaowu Xu committed
4
5
6
7
8
9
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Jingning Han's avatar
Jingning Han committed
10
11
12
13
14
 */

#include <assert.h>
#include <math.h>

Yaowu Xu's avatar
Yaowu Xu committed
15
#include "./aom_dsp_rtcd.h"
Jingning Han's avatar
Jingning Han committed
16
#include "./av1_rtcd.h"
Jingning Han's avatar
Jingning Han committed
17

Yaowu Xu's avatar
Yaowu Xu committed
18
#include "aom_dsp/aom_dsp_common.h"
19
#include "aom_dsp/blend.h"
Yaowu Xu's avatar
Yaowu Xu committed
20
#include "aom_mem/aom_mem.h"
21
22
#include "aom_ports/mem.h"
#include "aom_ports/system_state.h"
Jingning Han's avatar
Jingning Han committed
23

24
25
26
27
28
29
30
31
32
33
34
35
#include "av1/common/common.h"
#include "av1/common/common_data.h"
#include "av1/common/entropy.h"
#include "av1/common/entropymode.h"
#include "av1/common/idct.h"
#include "av1/common/mvref_common.h"
#include "av1/common/pred_common.h"
#include "av1/common/quant_common.h"
#include "av1/common/reconinter.h"
#include "av1/common/reconintra.h"
#include "av1/common/scan.h"
#include "av1/common/seg_common.h"
Yue Chen's avatar
Yue Chen committed
36
37
38
#if CONFIG_WARPED_MOTION
#include "av1/common/warped_motion.h"
#endif  // CONFIG_WARPED_MOTION
Jingning Han's avatar
Jingning Han committed
39

Jingning Han's avatar
Jingning Han committed
40
#include "av1/encoder/aq_variance.h"
41
42
43
44
45
46
#include "av1/encoder/cost.h"
#include "av1/encoder/encodemb.h"
#include "av1/encoder/encodemv.h"
#include "av1/encoder/encoder.h"
#include "av1/encoder/hybrid_fwd_txfm.h"
#include "av1/encoder/mcomp.h"
47
#if CONFIG_PALETTE
48
#include "av1/encoder/palette.h"
49
#endif  // CONFIG_PALETTE
50
51
52
53
#include "av1/encoder/quantize.h"
#include "av1/encoder/ratectrl.h"
#include "av1/encoder/rd.h"
#include "av1/encoder/rdopt.h"
54
#include "av1/encoder/tokenize.h"
55
56
57
#if CONFIG_PVQ
#include "av1/encoder/pvq_encoder.h"
#endif
58
#if CONFIG_DUAL_FILTER
Angie Chiang's avatar
Angie Chiang committed
59
#define DUAL_FILTER_SET_SIZE (SWITCHABLE_FILTERS * SWITCHABLE_FILTERS)
60
#if CONFIG_EXT_INTERP
Angie Chiang's avatar
Angie Chiang committed
61
static const int filter_sets[DUAL_FILTER_SET_SIZE][2] = {
62
63
64
  { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 }, { 1, 0 }, { 1, 1 },
  { 1, 2 }, { 1, 3 }, { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 },
  { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 },
65
};
Angie Chiang's avatar
Angie Chiang committed
66
67
#else   // CONFIG_EXT_INTERP
static const int filter_sets[DUAL_FILTER_SET_SIZE][2] = {
68
69
  { 0, 0 }, { 0, 1 }, { 0, 2 }, { 1, 0 }, { 1, 1 },
  { 1, 2 }, { 2, 0 }, { 2, 1 }, { 2, 2 },
70
};
Angie Chiang's avatar
Angie Chiang committed
71
72
#endif  // CONFIG_EXT_INTERP
#endif  // CONFIG_DUAL_FILTER
73

74
75
#if CONFIG_EXT_REFS

76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#define LAST_FRAME_MODE_MASK                                      \
  ((1 << INTRA_FRAME) | (1 << LAST2_FRAME) | (1 << LAST3_FRAME) | \
   (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
#define LAST2_FRAME_MODE_MASK                                    \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST3_FRAME) | \
   (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
#define LAST3_FRAME_MODE_MASK                                    \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
   (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
#define GOLDEN_FRAME_MODE_MASK                                   \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
   (1 << LAST3_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
#define BWDREF_FRAME_MODE_MASK                                   \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
   (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME))
#define ALTREF_FRAME_MODE_MASK                                   \
  ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
   (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME))

#else

#define LAST_FRAME_MODE_MASK \
  ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
#define GOLDEN_FRAME_MODE_MASK \
  ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
#define ALTREF_FRAME_MODE_MASK \
  ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | (1 << INTRA_FRAME))
Jingning Han's avatar
Jingning Han committed
103

104
105
#endif  // CONFIG_EXT_REFS

106
#if CONFIG_EXT_REFS
107
#define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | (1 << BWDREF_FRAME) | 0x01)
108
#else
109
#define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | 0x01)
110
#endif  // CONFIG_EXT_REFS
Jingning Han's avatar
Jingning Han committed
111

112
113
#define MIN_EARLY_TERM_INDEX 3
#define NEW_MV_DISCOUNT_FACTOR 8
Jingning Han's avatar
Jingning Han committed
114

115
116
117
118
119
120
#if CONFIG_EXT_INTRA
#define ANGLE_FAST_SEARCH 1
#define ANGLE_SKIP_THRESH 10
#define FILTER_FAST_SEARCH 1
#endif  // CONFIG_EXT_INTRA

121
122
const double ADST_FLIP_SVM[8] = { -6.6623, -2.8062, -3.2531, 3.1671,    // vert
                                  -7.7051, -3.2234, -3.6193, 3.4533 };  // horz
123

Jingning Han's avatar
Jingning Han committed
124
125
126
127
128
typedef struct {
  PREDICTION_MODE mode;
  MV_REFERENCE_FRAME ref_frame[2];
} MODE_DEFINITION;

129
typedef struct { MV_REFERENCE_FRAME ref_frame[2]; } REF_DEFINITION;
Jingning Han's avatar
Jingning Han committed
130
131

struct rdcost_block_args {
Yaowu Xu's avatar
Yaowu Xu committed
132
  const AV1_COMP *cpi;
Jingning Han's avatar
Jingning Han committed
133
  MACROBLOCK *x;
134
135
  ENTROPY_CONTEXT t_above[2 * MAX_MIB_SIZE];
  ENTROPY_CONTEXT t_left[2 * MAX_MIB_SIZE];
136
  RD_STATS rd_stats;
Jingning Han's avatar
Jingning Han committed
137
138
139
140
  int64_t this_rd;
  int64_t best_rd;
  int exit_early;
  int use_fast_coef_costing;
141
  const SCAN_ORDER *scan_order;
Jingning Han's avatar
Jingning Han committed
142
143
144
};

#define LAST_NEW_MV_INDEX 6
Yaowu Xu's avatar
Yaowu Xu committed
145
static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
146
  { NEARESTMV, { LAST_FRAME, NONE } },
147
#if CONFIG_EXT_REFS
148
149
150
  { NEARESTMV, { LAST2_FRAME, NONE } },
  { NEARESTMV, { LAST3_FRAME, NONE } },
  { NEARESTMV, { BWDREF_FRAME, NONE } },
151
#endif  // CONFIG_EXT_REFS
152
153
  { NEARESTMV, { ALTREF_FRAME, NONE } },
  { NEARESTMV, { GOLDEN_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
154

155
  { DC_PRED, { INTRA_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
156

157
  { NEWMV, { LAST_FRAME, NONE } },
158
#if CONFIG_EXT_REFS
159
160
161
  { NEWMV, { LAST2_FRAME, NONE } },
  { NEWMV, { LAST3_FRAME, NONE } },
  { NEWMV, { BWDREF_FRAME, NONE } },
162
#endif  // CONFIG_EXT_REFS
163
164
  { NEWMV, { ALTREF_FRAME, NONE } },
  { NEWMV, { GOLDEN_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
165

166
  { NEARMV, { LAST_FRAME, NONE } },
167
#if CONFIG_EXT_REFS
168
169
170
  { NEARMV, { LAST2_FRAME, NONE } },
  { NEARMV, { LAST3_FRAME, NONE } },
  { NEARMV, { BWDREF_FRAME, NONE } },
171
#endif  // CONFIG_EXT_REFS
172
173
  { NEARMV, { ALTREF_FRAME, NONE } },
  { NEARMV, { GOLDEN_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
174

Yue Chen's avatar
Yue Chen committed
175
#if CONFIG_EXT_INTER
176
  { NEWFROMNEARMV, { LAST_FRAME, NONE } },
177
#if CONFIG_EXT_REFS
178
179
180
  { NEWFROMNEARMV, { LAST2_FRAME, NONE } },
  { NEWFROMNEARMV, { LAST3_FRAME, NONE } },
  { NEWFROMNEARMV, { BWDREF_FRAME, NONE } },
181
#endif  // CONFIG_EXT_REFS
182
183
  { NEWFROMNEARMV, { ALTREF_FRAME, NONE } },
  { NEWFROMNEARMV, { GOLDEN_FRAME, NONE } },
Yue Chen's avatar
Yue Chen committed
184
185
#endif  // CONFIG_EXT_INTER

186
  { ZEROMV, { LAST_FRAME, NONE } },
187
#if CONFIG_EXT_REFS
188
189
190
  { ZEROMV, { LAST2_FRAME, NONE } },
  { ZEROMV, { LAST3_FRAME, NONE } },
  { ZEROMV, { BWDREF_FRAME, NONE } },
191
#endif  // CONFIG_EXT_REFS
192
193
  { ZEROMV, { GOLDEN_FRAME, NONE } },
  { ZEROMV, { ALTREF_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
194

195
// TODO(zoeliu): May need to reconsider the order on the modes to check
196

197
#if CONFIG_EXT_INTER
198
  { NEAREST_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
199
#if CONFIG_EXT_REFS
200
201
  { NEAREST_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEAREST_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
202
#endif  // CONFIG_EXT_REFS
203
  { NEAREST_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
204
#if CONFIG_EXT_REFS
205
206
207
208
  { NEAREST_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
209
#endif  // CONFIG_EXT_REFS
210

211
#else  // CONFIG_EXT_INTER
212

213
  { NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
214
#if CONFIG_EXT_REFS
215
216
  { NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
217
#endif  // CONFIG_EXT_REFS
218
  { NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
219
#if CONFIG_EXT_REFS
220
221
222
223
  { NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
224
#endif  // CONFIG_EXT_REFS
225
#endif  // CONFIG_EXT_INTER
Jingning Han's avatar
Jingning Han committed
226

227
  { TM_PRED, { INTRA_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
228

229
230
231
232
#if CONFIG_ALT_INTRA
  { SMOOTH_PRED, { INTRA_FRAME, NONE } },
#endif  // CONFIG_ALT_INTRA

233
#if CONFIG_EXT_INTER
234
235
236
237
238
239
240
241
242
  { NEAR_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEAREST_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEAR_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEW_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEAREST_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEW_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEAR_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEW_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
  { ZERO_ZEROMV, { LAST_FRAME, ALTREF_FRAME } },
243

244
#if CONFIG_EXT_REFS
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
  { NEAR_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEAREST_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEAR_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEW_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEAREST_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEW_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEAR_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEW_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
  { ZERO_ZEROMV, { LAST2_FRAME, ALTREF_FRAME } },

  { NEAR_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEAREST_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEAR_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEW_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEAREST_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEW_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEAR_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEW_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
  { ZERO_ZEROMV, { LAST3_FRAME, ALTREF_FRAME } },
264
#endif  // CONFIG_EXT_REFS
265

266
267
268
269
270
271
272
273
274
  { NEAR_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEAREST_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEAR_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEW_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEAREST_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEW_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEAR_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEW_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { ZERO_ZEROMV, { GOLDEN_FRAME, ALTREF_FRAME } },
275
276

#if CONFIG_EXT_REFS
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
  { NEAR_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEAR_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEW_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEAREST_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEW_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEAR_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEW_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
  { ZERO_ZEROMV, { LAST_FRAME, BWDREF_FRAME } },

  { NEAR_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEAR_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEW_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEAREST_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEW_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEAR_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEW_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
  { ZERO_ZEROMV, { LAST2_FRAME, BWDREF_FRAME } },

  { NEAR_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEAR_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEW_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEAREST_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEW_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEAR_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEW_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
  { ZERO_ZEROMV, { LAST3_FRAME, BWDREF_FRAME } },

  { NEAR_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEAREST_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEAR_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEW_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEAREST_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEW_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEAR_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEW_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { ZERO_ZEROMV, { GOLDEN_FRAME, BWDREF_FRAME } },
316
#endif  // CONFIG_EXT_REFS
317
318
319

#else  // CONFIG_EXT_INTER

320
321
  { NEARMV, { LAST_FRAME, ALTREF_FRAME } },
  { NEWMV, { LAST_FRAME, ALTREF_FRAME } },
322
#if CONFIG_EXT_REFS
323
324
325
326
  { NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
  { NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
  { NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
327
#endif  // CONFIG_EXT_REFS
328
329
  { NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
  { NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
330
331

#if CONFIG_EXT_REFS
332
333
334
335
336
337
338
339
  { NEARMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEWMV, { LAST_FRAME, BWDREF_FRAME } },
  { NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
  { NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
  { NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
  { NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
340
#endif  // CONFIG_EXT_REFS
Jingning Han's avatar
Jingning Han committed
341

342
  { ZEROMV, { LAST_FRAME, ALTREF_FRAME } },
343
#if CONFIG_EXT_REFS
344
345
  { ZEROMV, { LAST2_FRAME, ALTREF_FRAME } },
  { ZEROMV, { LAST3_FRAME, ALTREF_FRAME } },
346
#endif  // CONFIG_EXT_REFS
347
  { ZEROMV, { GOLDEN_FRAME, ALTREF_FRAME } },
348
349

#if CONFIG_EXT_REFS
350
351
352
353
  { ZEROMV, { LAST_FRAME, BWDREF_FRAME } },
  { ZEROMV, { LAST2_FRAME, BWDREF_FRAME } },
  { ZEROMV, { LAST3_FRAME, BWDREF_FRAME } },
  { ZEROMV, { GOLDEN_FRAME, BWDREF_FRAME } },
354
#endif  // CONFIG_EXT_REFS
355

356
#endif  // CONFIG_EXT_INTER
Jingning Han's avatar
Jingning Han committed
357

358
359
360
361
362
363
364
365
  { H_PRED, { INTRA_FRAME, NONE } },
  { V_PRED, { INTRA_FRAME, NONE } },
  { D135_PRED, { INTRA_FRAME, NONE } },
  { D207_PRED, { INTRA_FRAME, NONE } },
  { D153_PRED, { INTRA_FRAME, NONE } },
  { D63_PRED, { INTRA_FRAME, NONE } },
  { D117_PRED, { INTRA_FRAME, NONE } },
  { D45_PRED, { INTRA_FRAME, NONE } },
366
367

#if CONFIG_EXT_INTER
368
369
370
371
  { ZEROMV, { LAST_FRAME, INTRA_FRAME } },
  { NEARESTMV, { LAST_FRAME, INTRA_FRAME } },
  { NEARMV, { LAST_FRAME, INTRA_FRAME } },
  { NEWMV, { LAST_FRAME, INTRA_FRAME } },
372
373

#if CONFIG_EXT_REFS
374
375
376
377
378
379
380
381
382
  { ZEROMV, { LAST2_FRAME, INTRA_FRAME } },
  { NEARESTMV, { LAST2_FRAME, INTRA_FRAME } },
  { NEARMV, { LAST2_FRAME, INTRA_FRAME } },
  { NEWMV, { LAST2_FRAME, INTRA_FRAME } },

  { ZEROMV, { LAST3_FRAME, INTRA_FRAME } },
  { NEARESTMV, { LAST3_FRAME, INTRA_FRAME } },
  { NEARMV, { LAST3_FRAME, INTRA_FRAME } },
  { NEWMV, { LAST3_FRAME, INTRA_FRAME } },
383
384
#endif  // CONFIG_EXT_REFS

385
386
387
388
  { ZEROMV, { GOLDEN_FRAME, INTRA_FRAME } },
  { NEARESTMV, { GOLDEN_FRAME, INTRA_FRAME } },
  { NEARMV, { GOLDEN_FRAME, INTRA_FRAME } },
  { NEWMV, { GOLDEN_FRAME, INTRA_FRAME } },
389

390
#if CONFIG_EXT_REFS
391
392
393
394
  { ZEROMV, { BWDREF_FRAME, INTRA_FRAME } },
  { NEARESTMV, { BWDREF_FRAME, INTRA_FRAME } },
  { NEARMV, { BWDREF_FRAME, INTRA_FRAME } },
  { NEWMV, { BWDREF_FRAME, INTRA_FRAME } },
395
396
#endif  // CONFIG_EXT_REFS

397
398
399
400
  { ZEROMV, { ALTREF_FRAME, INTRA_FRAME } },
  { NEARESTMV, { ALTREF_FRAME, INTRA_FRAME } },
  { NEARMV, { ALTREF_FRAME, INTRA_FRAME } },
  { NEWMV, { ALTREF_FRAME, INTRA_FRAME } },
401
#endif  // CONFIG_EXT_INTER
Jingning Han's avatar
Jingning Han committed
402
403
};

Yaowu Xu's avatar
Yaowu Xu committed
404
static const REF_DEFINITION av1_ref_order[MAX_REFS] = {
405
  { { LAST_FRAME, NONE } },
406
#if CONFIG_EXT_REFS
407
408
  { { LAST2_FRAME, NONE } },          { { LAST3_FRAME, NONE } },
  { { BWDREF_FRAME, NONE } },
409
#endif  // CONFIG_EXT_REFS
410
  { { GOLDEN_FRAME, NONE } },         { { ALTREF_FRAME, NONE } },
411

412
  { { LAST_FRAME, ALTREF_FRAME } },
413
#if CONFIG_EXT_REFS
414
  { { LAST2_FRAME, ALTREF_FRAME } },  { { LAST3_FRAME, ALTREF_FRAME } },
415
#endif  // CONFIG_EXT_REFS
416
  { { GOLDEN_FRAME, ALTREF_FRAME } },
417
418

#if CONFIG_EXT_REFS
419
420
  { { LAST_FRAME, BWDREF_FRAME } },   { { LAST2_FRAME, BWDREF_FRAME } },
  { { LAST3_FRAME, BWDREF_FRAME } },  { { GOLDEN_FRAME, BWDREF_FRAME } },
421
#endif  // CONFIG_EXT_REFS
422

423
  { { INTRA_FRAME, NONE } },
Jingning Han's avatar
Jingning Han committed
424
425
};

426
#if CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA || CONFIG_PALETTE
hui su's avatar
hui su committed
427
428
static INLINE int write_uniform_cost(int n, int v) {
  int l = get_unsigned_bits(n), m = (1 << l) - n;
429
  if (l == 0) return 0;
hui su's avatar
hui su committed
430
  if (v < m)
Yaowu Xu's avatar
Yaowu Xu committed
431
    return (l - 1) * av1_cost_bit(128, 0);
hui su's avatar
hui su committed
432
  else
Yaowu Xu's avatar
Yaowu Xu committed
433
    return l * av1_cost_bit(128, 0);
hui su's avatar
hui su committed
434
}
435
#endif  // CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA || CONFIG_PALETTE
hui su's avatar
hui su committed
436

437
438
439
// constants for prune 1 and prune 2 decision boundaries
#define FAST_EXT_TX_CORR_MID 0.0
#define FAST_EXT_TX_EDST_MID 0.1
440
441
442
443
#define FAST_EXT_TX_CORR_MARGIN 0.5
#define FAST_EXT_TX_EDST_MARGIN 0.3

static const TX_TYPE_1D vtx_tab[TX_TYPES] = {
444
  DCT_1D,      ADST_1D, DCT_1D,      ADST_1D,
445
#if CONFIG_EXT_TX
446
447
  FLIPADST_1D, DCT_1D,  FLIPADST_1D, ADST_1D, FLIPADST_1D, IDTX_1D,
  DCT_1D,      IDTX_1D, ADST_1D,     IDTX_1D, FLIPADST_1D, IDTX_1D,
448
449
450
451
#endif  // CONFIG_EXT_TX
};

static const TX_TYPE_1D htx_tab[TX_TYPES] = {
452
  DCT_1D,  DCT_1D,      ADST_1D,     ADST_1D,
453
#if CONFIG_EXT_TX
454
455
  DCT_1D,  FLIPADST_1D, FLIPADST_1D, FLIPADST_1D, ADST_1D, IDTX_1D,
  IDTX_1D, DCT_1D,      IDTX_1D,     ADST_1D,     IDTX_1D, FLIPADST_1D,
456
457
#endif  // CONFIG_EXT_TX
};
458

Yaowu Xu's avatar
Yaowu Xu committed
459
static void get_energy_distribution_fine(const AV1_COMP *cpi, BLOCK_SIZE bsize,
460
461
462
                                         uint8_t *src, int src_stride,
                                         uint8_t *dst, int dst_stride,
                                         double *hordist, double *verdist) {
463
464
  int bw = block_size_wide[bsize];
  int bh = block_size_high[bsize];
465
  unsigned int esq[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
466
467
  unsigned int var[16];
  double total = 0;
468

469
  const int f_index = bsize - BLOCK_16X16;
470
471
472
473
  if (f_index < 0) {
    int i, j, index;
    int w_shift = bw == 8 ? 1 : 2;
    int h_shift = bh == 8 ? 1 : 2;
Yaowu Xu's avatar
Yaowu Xu committed
474
#if CONFIG_AOM_HIGHBITDEPTH
475
476
477
478
479
480
    if (cpi->common.use_highbitdepth) {
      uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
      uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
      for (i = 0; i < bh; ++i)
        for (j = 0; j < bw; ++j) {
          index = (j >> w_shift) + ((i >> h_shift) << 2);
481
482
483
          esq[index] +=
              (src16[j + i * src_stride] - dst16[j + i * dst_stride]) *
              (src16[j + i * src_stride] - dst16[j + i * dst_stride]);
484
485
        }
    } else {
Yaowu Xu's avatar
Yaowu Xu committed
486
#endif  // CONFIG_AOM_HIGHBITDEPTH
487
488
489
490
491
492
493

      for (i = 0; i < bh; ++i)
        for (j = 0; j < bw; ++j) {
          index = (j >> w_shift) + ((i >> h_shift) << 2);
          esq[index] += (src[j + i * src_stride] - dst[j + i * dst_stride]) *
                        (src[j + i * src_stride] - dst[j + i * dst_stride]);
        }
Yaowu Xu's avatar
Yaowu Xu committed
494
#if CONFIG_AOM_HIGHBITDEPTH
495
    }
Yaowu Xu's avatar
Yaowu Xu committed
496
#endif  // CONFIG_AOM_HIGHBITDEPTH
497
  } else {
498
499
500
501
502
    var[0] = cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[0]);
    var[1] = cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4,
                                     dst_stride, &esq[1]);
    var[2] = cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2,
                                     dst_stride, &esq[2]);
503
504
505
506
507
    var[3] = cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride,
                                     dst + 3 * bw / 4, dst_stride, &esq[3]);
    src += bh / 4 * src_stride;
    dst += bh / 4 * dst_stride;

508
509
510
511
512
    var[4] = cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[4]);
    var[5] = cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4,
                                     dst_stride, &esq[5]);
    var[6] = cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2,
                                     dst_stride, &esq[6]);
513
514
515
516
517
    var[7] = cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride,
                                     dst + 3 * bw / 4, dst_stride, &esq[7]);
    src += bh / 4 * src_stride;
    dst += bh / 4 * dst_stride;

518
519
520
521
522
    var[8] = cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[8]);
    var[9] = cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4,
                                     dst_stride, &esq[9]);
    var[10] = cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2,
                                      dst_stride, &esq[10]);
523
524
525
526
527
    var[11] = cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride,
                                      dst + 3 * bw / 4, dst_stride, &esq[11]);
    src += bh / 4 * src_stride;
    dst += bh / 4 * dst_stride;

528
529
530
531
532
533
    var[12] =
        cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[12]);
    var[13] = cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4,
                                      dst_stride, &esq[13]);
    var[14] = cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2,
                                      dst_stride, &esq[14]);
534
535
536
537
    var[15] = cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride,
                                      dst + 3 * bw / 4, dst_stride, &esq[15]);
  }

538
539
540
  total = esq[0] + esq[1] + esq[2] + esq[3] + esq[4] + esq[5] + esq[6] +
          esq[7] + esq[8] + esq[9] + esq[10] + esq[11] + esq[12] + esq[13] +
          esq[14] + esq[15];
541
542
  if (total > 0) {
    const double e_recip = 1.0 / total;
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
    hordist[0] =
        ((double)esq[0] + (double)esq[4] + (double)esq[8] + (double)esq[12]) *
        e_recip;
    hordist[1] =
        ((double)esq[1] + (double)esq[5] + (double)esq[9] + (double)esq[13]) *
        e_recip;
    hordist[2] =
        ((double)esq[2] + (double)esq[6] + (double)esq[10] + (double)esq[14]) *
        e_recip;
    verdist[0] =
        ((double)esq[0] + (double)esq[1] + (double)esq[2] + (double)esq[3]) *
        e_recip;
    verdist[1] =
        ((double)esq[4] + (double)esq[5] + (double)esq[6] + (double)esq[7]) *
        e_recip;
    verdist[2] =
        ((double)esq[8] + (double)esq[9] + (double)esq[10] + (double)esq[11]) *
        e_recip;
561
562
563
564
565
  } else {
    hordist[0] = verdist[0] = 0.25;
    hordist[1] = verdist[1] = 0.25;
    hordist[2] = verdist[2] = 0.25;
  }
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
  (void)var[0];
  (void)var[1];
  (void)var[2];
  (void)var[3];
  (void)var[4];
  (void)var[5];
  (void)var[6];
  (void)var[7];
  (void)var[8];
  (void)var[9];
  (void)var[10];
  (void)var[11];
  (void)var[12];
  (void)var[13];
  (void)var[14];
  (void)var[15];
582
583
}

Yaowu Xu's avatar
Yaowu Xu committed
584
585
586
static int adst_vs_flipadst(const AV1_COMP *cpi, BLOCK_SIZE bsize, uint8_t *src,
                            int src_stride, uint8_t *dst, int dst_stride,
                            double *hdist, double *vdist) {
587
588
  int prune_bitmask = 0;
  double svm_proj_h = 0, svm_proj_v = 0;
589
590
  get_energy_distribution_fine(cpi, bsize, src, src_stride, dst, dst_stride,
                               hdist, vdist);
591

592
  svm_proj_v = vdist[0] * ADST_FLIP_SVM[0] + vdist[1] * ADST_FLIP_SVM[1] +
593
               vdist[2] * ADST_FLIP_SVM[2] + ADST_FLIP_SVM[3];
594
  svm_proj_h = hdist[0] * ADST_FLIP_SVM[4] + hdist[1] * ADST_FLIP_SVM[5] +
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
               hdist[2] * ADST_FLIP_SVM[6] + ADST_FLIP_SVM[7];
  if (svm_proj_v > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN)
    prune_bitmask |= 1 << FLIPADST_1D;
  else if (svm_proj_v < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN)
    prune_bitmask |= 1 << ADST_1D;

  if (svm_proj_h > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN)
    prune_bitmask |= 1 << (FLIPADST_1D + 8);
  else if (svm_proj_h < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN)
    prune_bitmask |= 1 << (ADST_1D + 8);

  return prune_bitmask;
}

#if CONFIG_EXT_TX
610
static void get_horver_correlation(int16_t *diff, int stride, int w, int h,
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
                                   double *hcorr, double *vcorr) {
  // Returns hor/ver correlation coefficient
  const int num = (h - 1) * (w - 1);
  double num_r;
  int i, j;
  int64_t xy_sum = 0, xz_sum = 0;
  int64_t x_sum = 0, y_sum = 0, z_sum = 0;
  int64_t x2_sum = 0, y2_sum = 0, z2_sum = 0;
  double x_var_n, y_var_n, z_var_n, xy_var_n, xz_var_n;
  *hcorr = *vcorr = 1;

  assert(num > 0);
  num_r = 1.0 / num;
  for (i = 1; i < h; ++i) {
    for (j = 1; j < w; ++j) {
      const int16_t x = diff[i * stride + j];
      const int16_t y = diff[i * stride + j - 1];
      const int16_t z = diff[(i - 1) * stride + j];
      xy_sum += x * y;
      xz_sum += x * z;
      x_sum += x;
      y_sum += y;
      z_sum += z;
      x2_sum += x * x;
      y2_sum += y * y;
      z2_sum += z * z;
    }
  }
639
640
641
  x_var_n = x2_sum - (x_sum * x_sum) * num_r;
  y_var_n = y2_sum - (y_sum * y_sum) * num_r;
  z_var_n = z2_sum - (z_sum * z_sum) * num_r;
642
643
644
645
646
647
648
649
650
651
652
653
  xy_var_n = xy_sum - (x_sum * y_sum) * num_r;
  xz_var_n = xz_sum - (x_sum * z_sum) * num_r;
  if (x_var_n > 0 && y_var_n > 0) {
    *hcorr = xy_var_n / sqrt(x_var_n * y_var_n);
    *hcorr = *hcorr < 0 ? 0 : *hcorr;
  }
  if (x_var_n > 0 && z_var_n > 0) {
    *vcorr = xz_var_n / sqrt(x_var_n * z_var_n);
    *vcorr = *vcorr < 0 ? 0 : *vcorr;
  }
}

654
655
int dct_vs_idtx(int16_t *diff, int stride, int w, int h, double *hcorr,
                double *vcorr) {
656
657
658
659
  int prune_bitmask = 0;
  get_horver_correlation(diff, stride, w, h, hcorr, vcorr);

  if (*vcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN)
660
    prune_bitmask |= 1 << IDTX_1D;
661
662
663
664
  else if (*vcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN)
    prune_bitmask |= 1 << DCT_1D;

  if (*hcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN)
665
    prune_bitmask |= 1 << (IDTX_1D + 8);
666
667
668
669
670
671
  else if (*hcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN)
    prune_bitmask |= 1 << (DCT_1D + 8);
  return prune_bitmask;
}

// Performance drop: 0.5%, Speed improvement: 24%
Yaowu Xu's avatar
Yaowu Xu committed
672
static int prune_two_for_sby(const AV1_COMP *cpi, BLOCK_SIZE bsize,
673
                             MACROBLOCK *x, MACROBLOCKD *xd, int adst_flipadst,
674
                             int dct_idtx) {
675
676
677
678
679
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];
  const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
  const int bw = 4 << (b_width_log2_lookup[bs]);
  const int bh = 4 << (b_height_log2_lookup[bs]);
680
  double hdist[3] = { 0, 0, 0 }, vdist[3] = { 0, 0, 0 };
681
  double hcorr, vcorr;
682
  int prune = 0;
Yaowu Xu's avatar
Yaowu Xu committed
683
  av1_subtract_plane(x, bsize, 0);
684

685
686
687
  if (adst_flipadst)
    prune |= adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride,
                              pd->dst.buf, pd->dst.stride, hdist, vdist);
688
  if (dct_idtx) prune |= dct_vs_idtx(p->src_diff, bw, bw, bh, &hcorr, &vcorr);
689
690
691

  return prune;
}
692
693
#endif  // CONFIG_EXT_TX

694
// Performance drop: 0.3%, Speed improvement: 5%
Yaowu Xu's avatar
Yaowu Xu committed
695
static int prune_one_for_sby(const AV1_COMP *cpi, BLOCK_SIZE bsize,
696
                             MACROBLOCK *x, MACROBLOCKD *xd) {
697
698
  struct macroblock_plane *const p = &x->plane[0];
  struct macroblockd_plane *const pd = &xd->plane[0];
699
  double hdist[3] = { 0, 0, 0 }, vdist[3] = { 0, 0, 0 };
Yaowu Xu's avatar
Yaowu Xu committed
700
  av1_subtract_plane(x, bsize, 0);
701
702
  return adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride, pd->dst.buf,
                          pd->dst.stride, hdist, vdist);
703
704
}

Yaowu Xu's avatar
Yaowu Xu committed
705
static int prune_tx_types(const AV1_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x,
706
707
708
709
                          MACROBLOCKD *xd, int tx_set) {
#if CONFIG_EXT_TX
  const int *tx_set_1D = ext_tx_used_inter_1D[tx_set];
#else
710
  const int tx_set_1D[TX_TYPES_1D] = { 0 };
711
712
#endif

713
  switch (cpi->sf.tx_type_search.prune_mode) {
714
715
    case NO_PRUNE: return 0; break;
    case PRUNE_ONE:
716
      if ((tx_set >= 0) && !(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D]))
717
        return 0;
718
719
      return prune_one_for_sby(cpi, bsize, x, xd);
      break;
720
721
#if CONFIG_EXT_TX
    case PRUNE_TWO:
722
      if ((tx_set >= 0) && !(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D])) {
723
        if (!(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D])) return 0;
724
725
        return prune_two_for_sby(cpi, bsize, x, xd, 0, 1);
      }
726
      if ((tx_set >= 0) && !(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D]))
727
728
        return prune_two_for_sby(cpi, bsize, x, xd, 1, 0);
      return prune_two_for_sby(cpi, bsize, x, xd, 1, 1);
729
      break;
730
#endif
731
732
733
734
735
  }
  assert(0);
  return 0;
}

736
static int do_tx_type_search(TX_TYPE tx_type, int prune) {
737
738
739
// TODO(sarahparker) implement for non ext tx
#if CONFIG_EXT_TX
  return !(((prune >> vtx_tab[tx_type]) & 1) |
740
           ((prune >> (htx_tab[tx_type] + 8)) & 1));
741
742
#else
  // temporary to avoid compiler warnings
743
744
745
746
  (void)vtx_tab;
  (void)htx_tab;
  (void)tx_type;
  (void)prune;
747
748
749
750
  return 1;
#endif
}

Yaowu Xu's avatar
Yaowu Xu committed
751
static void model_rd_from_sse(const AV1_COMP *const cpi,
752
753
                              const MACROBLOCKD *const xd, BLOCK_SIZE bsize,
                              int plane, int64_t sse, int *rate,
Geza Lore's avatar
Geza Lore committed
754
755
756
                              int64_t *dist) {
  const struct macroblockd_plane *const pd = &xd->plane[plane];
  const int dequant_shift =
Yaowu Xu's avatar
Yaowu Xu committed
757
#if CONFIG_AOM_HIGHBITDEPTH
758
      (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 :
Yaowu Xu's avatar
Yaowu Xu committed
759
#endif  // CONFIG_AOM_HIGHBITDEPTH
760
                                                    3;
Geza Lore's avatar
Geza Lore committed
761
762
763
764
765
766
767

  // Fast approximate the modelling function.
  if (cpi->sf.simple_model_rd_from_var) {
    const int64_t square_error = sse;
    int quantizer = (pd->dequant[1] >> dequant_shift);

    if (quantizer < 120)
Yaowu Xu's avatar
Yaowu Xu committed
768
      *rate = (int)((square_error * (280 - quantizer)) >>
Yaowu Xu's avatar
Yaowu Xu committed
769
                    (16 - AV1_PROB_COST_SHIFT));
Geza Lore's avatar
Geza Lore committed
770
771
772
773
    else
      *rate = 0;
    *dist = (square_error * quantizer) >> 8;
  } else {
Yaowu Xu's avatar
Yaowu Xu committed
774
775
    av1_model_rd_from_var_lapndz(sse, num_pels_log2_lookup[bsize],
                                 pd->dequant[1] >> dequant_shift, rate, dist);
Geza Lore's avatar
Geza Lore committed
776
777
778
779
780
  }

  *dist <<= 4;
}

Yaowu Xu's avatar
Yaowu Xu committed
781
static void model_rd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
782
783
784
785
                            MACROBLOCK *x, MACROBLOCKD *xd, int plane_from,
                            int plane_to, int *out_rate_sum,
                            int64_t *out_dist_sum, int *skip_txfm_sb,
                            int64_t *skip_sse_sb) {
Jingning Han's avatar
Jingning Han committed
786
787
788
  // Note our transform coeffs are 8 times an orthogonal transform.
  // Hence quantizer step is also 8 times. To get effective quantizer
  // we need to divide by 8 before sending to modeling function.
Geza Lore's avatar
Geza Lore committed
789
790
791
  int plane;
  const int ref = xd->mi[0]->mbmi.ref_frame[0];

Jingning Han's avatar
Jingning Han committed
792
793
794
795
796
797
  int64_t rate_sum = 0;
  int64_t dist_sum = 0;
  int64_t total_sse = 0;

  x->pred_sse[ref] = 0;

Geza Lore's avatar
Geza Lore committed
798
799
800
  for (plane = plane_from; plane <= plane_to; ++plane) {
    struct macroblock_plane *const p = &x->plane[plane];
    struct macroblockd_plane *const pd = &xd->plane[plane];
Jingning Han's avatar
Jingning Han committed
801
802
    const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);

Geza Lore's avatar
Geza Lore committed
803
804
805
    unsigned int sse;
    int rate;
    int64_t dist;
Jingning Han's avatar
Jingning Han committed
806

Geza Lore's avatar
Geza Lore committed
807
808
    // TODO(geza): Write direct sse functions that do not compute
    // variance as well.
809
810
    cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride,
                       &sse);
Jingning Han's avatar
Jingning Han committed
811

812
    if (plane == 0) x->pred_sse[ref] = sse;
Jingning Han's avatar
Jingning Han committed
813

Geza Lore's avatar
Geza Lore committed
814
    total_sse += sse;
Jingning Han's avatar
Jingning Han committed
815

Geza Lore's avatar
Geza Lore committed
816
    model_rd_from_sse(cpi, xd, bs, plane, sse, &rate, &dist);
Geza Lore's avatar
Geza Lore committed
817
818
819

    rate_sum += rate;
    dist_sum += dist;
Jingning Han's avatar
Jingning Han committed
820
821
  }

Geza Lore's avatar
Geza Lore committed
822
  *skip_txfm_sb = total_sse == 0;
Jingning Han's avatar
Jingning Han committed
823
824
  *skip_sse_sb = total_sse << 4;
  *out_rate_sum = (int)rate_sum;
Geza Lore's avatar
Geza Lore committed
825
  *out_dist_sum = dist_sum;
Jingning Han's avatar
Jingning Han committed
826
827
}

828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
#if CONFIG_PVQ
// Without PVQ, av1_block_error_c() return two kind of errors,
// 1) reconstruction (i.e. decoded) error and
// 2) Squared sum of transformed residue (i.e. 'coeff')
// However, if PVQ is enabled, coeff does not keep the transformed residue
// but instead a transformed original is kept.
// Hence, new parameter ref vector (i.e. transformed predicted signal)
// is required to derive the residue signal,
// i.e. coeff - ref = residue (all transformed).

// TODO(yushin) : Since 4x4 case does not need ssz, better to refactor into
// a separate function that does not do the extra computations for ssz.
int64_t av1_block_error2_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
                           const tran_low_t *ref, intptr_t block_size,
                           int64_t *ssz) {
  int64_t error;

  // Use the existing sse codes for calculating distortion of decoded signal:
  // i.e. (orig - decoded)^2
  error = av1_block_error_fp(coeff, dqcoeff, block_size);
  // prediction residue^2 = (orig - ref)^2
  *ssz = av1_block_error_fp(coeff, ref, block_size);

  return error;
}
#endif

Yaowu Xu's avatar
Yaowu Xu committed
855
856
int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
                          intptr_t block_size, int64_t *ssz) {
Jingning Han's avatar
Jingning Han committed
857
858
859
860
861
  int i;
  int64_t error = 0, sqcoeff = 0;

  for (i = 0; i < block_size; i++) {
    const int diff = coeff[i] - dqcoeff[i];
862
    error += diff * diff;
Jingning Han's avatar
Jingning Han committed
863
864
865
866
867
868
869
    sqcoeff += coeff[i] * coeff[i];
  }

  *ssz = sqcoeff;
  return error;
}

Yaowu Xu's avatar
Yaowu Xu committed
870
871
int64_t av1_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff,
                             int block_size) {
Jingning Han's avatar
Jingning Han committed
872
873
874
875
876
  int i;
  int64_t error = 0;

  for (i = 0; i < block_size; i++) {
    const int diff = coeff[i] - dqcoeff[i];
877
    error += diff * diff;
Jingning Han's avatar
Jingning Han committed
878
879
880
881
882
  }

  return error;
}

Yaowu Xu's avatar
Yaowu Xu committed
883
884
885
886
#if CONFIG_AOM_HIGHBITDEPTH
int64_t av1_highbd_block_error_c(const tran_low_t *coeff,
                                 const tran_low_t *dqcoeff, intptr_t block_size,
                                 int64_t *ssz, int bd) {
Jingning Han's avatar
Jingning Han committed
887
888
889
890
891
892
893
  int i;
  int64_t error = 0, sqcoeff = 0;
  int shift = 2 * (bd - 8);
  int rounding = shift > 0 ? 1 << (shift - 1) : 0;

  for (i = 0; i < block_size; i++) {
    const int64_t diff = coeff[i] - dqcoeff[i];
894
    error += diff * diff;
Jingning Han's avatar
Jingning Han committed
895
896
897
898
899
900
901
902
903
    sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
  }
  assert(error >= 0 && sqcoeff >= 0);
  error = (error + rounding) >> shift;
  sqcoeff = (sqcoeff + rounding) >> shift;

  *ssz = sqcoeff;
  return error;
}
Yaowu Xu's avatar
Yaowu Xu committed
904
#endif  // CONFIG_AOM_HIGHBITDEPTH
Jingning Han's avatar
Jingning Han committed
905

906
#if !CONFIG_PVQ
907
/* The trailing '0' is a terminator which is used inside av1_cost_coeffs() to
Jingning Han's avatar
Jingning Han committed
908
909
910
911
 * decide whether to include cost of a trailing EOB node or not (i.e. we
 * can skip this if the last coefficient in this transform block, e.g. the
 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
 * were non-zero). */
912
913
914
int av1_cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane,
                    int block, int coeff_ctx, TX_SIZE tx_size,
                    const int16_t *scan, const int16_t *nb,
915
                    int use_fast_coef_costing) {
Jingning Han's avatar
Jingning Han committed
916
917
918
919
920
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
  const struct macroblock_plane *p = &x->plane[plane];
  const struct macroblockd_plane *pd = &xd->plane[plane];
  const PLANE_TYPE type = pd->plane_type;
921
  const uint16_t *band_count = &band_count_table[tx_size][1];
Jingning Han's avatar
Jingning Han committed
922
923
  const int eob = p->eobs[block];
  const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
924
  const int tx_size_ctx = txsize_sqr_map[tx_size];
925
926
  unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
      x->token_costs[tx_size_ctx][type][is_inter_block(mbmi)];
927
  uint8_t token_cache[MAX_TX_SQUARE];
928
  int pt = coeff_ctx;
Jingning Han's avatar
Jingning Han committed
929
  int c, cost;
Yaowu Xu's avatar
Yaowu Xu committed
930
931
#if CONFIG_AOM_HIGHBITDEPTH
  const int *cat6_high_cost = av1_get_high_cost_table(xd->bd);
Jingning Han's avatar
Jingning Han committed
932
#else
Yaowu Xu's avatar
Yaowu Xu committed
933
  const int *cat6_high_cost = av1_get_high_cost_table(8);
Jingning Han's avatar
Jingning Han committed
934
935
#endif

936
#if !CONFIG_VAR_TX && !CONFIG_SUPERTX
Jingning Han's avatar
Jingning Han committed
937
938
939
  // Check for consistency of tx_size with mode info
  assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
                              : get_uv_tx_size(mbmi, pd) == tx_size);
940
#endif  // !CONFIG_VAR_TX && !CONFIG_SUPERTX
941
  (void)cm;
Jingning Han's avatar
Jingning Han committed
942
943
944
945
946

  if (eob == 0) {
    // single eob token
    cost = token_costs[0][0][pt][EOB_TOKEN];
  } else {
Julia Robson's avatar
Julia Robson committed
947
948
949
950
951
952
    if (use_fast_coef_costing) {
      int band_left = *band_count++;

      // dc token
      int v = qcoeff[0];
      int16_t prev_t;
Yaowu Xu's avatar
Yaowu Xu committed
953
      cost = av1_get_token_cost(v, &prev_t, cat6_high_cost);
Julia Robson's avatar
Julia Robson committed
954
955
      cost += (*token_costs)[0][pt][prev_t];

Yaowu Xu's avatar
Yaowu Xu committed
956
      token_cache[0] = av1_pt_energy_class[prev_t];
Julia Robson's avatar
Julia Robson committed
957
958
959
960
961
962
963
964
      ++token_costs;

      // ac tokens
      for (c = 1; c < eob; c++) {
        const int rc = scan[c];
        int16_t t;

        v = qcoeff[rc];
Yaowu Xu's avatar
Yaowu Xu committed
965
        cost += av1_get_token_cost(v, &t, cat6_high_cost);
Julia Robson's avatar
Julia Robson committed
966
967
968
969
970
971
        cost += (*token_costs)[!prev_t][!prev_t][t];
        prev_t = t;
        if (!--band_left) {
          band_left = *band_count++;
          ++token_costs;
        }
Jingning Han's avatar
Jingning Han committed
972
973
      }

Julia Robson's avatar
Julia Robson committed
974
      // eob token
975
      if (band_left) cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
Julia Robson's avatar
Julia Robson committed
976
977
978
979
980
981
982

    } else {  // !use_fast_coef_costing
      int band_left = *band_count++;

      // dc token
      int v = qcoeff[0];
      int16_t tok;
983
      unsigned int(*tok_cost_ptr)[COEFF_CONTEXTS][ENTROPY_TOKENS];
Yaowu Xu's avatar
Yaowu Xu committed
984
      cost = av1_get_token_cost(v, &tok, cat6_high_cost);
Julia Robson's avatar
Julia Robson committed
985
986
      cost += (*token_costs)[0][pt][tok];

Yaowu Xu's avatar
Yaowu Xu committed
987
      token_cache[0] = av1_pt_energy_class[tok];
Julia Robson's avatar
Julia Robson committed
988
989
990
991
992
993
994
995
996
      ++token_costs;

      tok_cost_ptr = &((*token_costs)[!tok]);

      // ac tokens
      for (c = 1; c < eob; c++) {
        const int rc = scan[c];

        v = qcoeff[rc];
Yaowu Xu's avatar
Yaowu Xu committed
997
        cost += av1_get_token_cost(v, &tok, cat6_high_cost);
Julia Robson's avatar
Julia Robson committed
998
999
        pt = get_coef_context(nb, token_cache, c);
        cost += (*tok_cost_ptr)[pt][tok];
Yaowu Xu's avatar
Yaowu Xu committed
1000
        token_cache[rc] = av1_pt_energy_class[tok];
Julia Robson's avatar
Julia Robson committed
1001
1002
1003
1004
1005
1006
1007
1008
1009
        if (!--band_left) {
          band_left = *band_count++;
          ++token_costs;
        }
        tok_cost_ptr = &((*token_costs)[!tok]);
      }

      // eob token
      if (band_left) {
Jingning Han's avatar
Jingning Han committed
1010
1011
1012
1013
1014
1015
1016
1017
        pt = get_coef_context(nb, token_cache, c);
        cost += (*token_costs)[0][pt][EOB_TOKEN];
      }
    }
  }

  return cost;
}
1018
#endif
Jingning Han's avatar
Jingning Han committed
1019

Yaowu Xu's avatar
Yaowu Xu committed
1020
1021
static void dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane, int block,
                       int blk_row, int blk_col, TX_SIZE tx_size,
Jingning Han's avatar
Jingning Han committed
1022
                       int64_t *out_dist, int64_t *out_sse) {
1023
  MACROBLOCKD *const xd = &x->e_mbd;
Alex Converse's avatar
Alex Converse committed
1024
1025
  const struct macroblock_plane *const p = &x->plane[plane];
  const struct macroblockd_plane *const pd = &xd->plane[plane];
1026
1027
1028
  if (cpi->sf.use_transform_domain_distortion) {
    // Transform domain distortion computation is more accurate as it does
    // not involve an inverse transform, but it is less accurate.
Jingning Han's avatar
Jingning Han committed
1029
    const int buffer_length = tx_size_2d[tx_size];
1030
    int64_t this_sse;
1031
    int shift = (MAX_TX_SCALE - get_tx_scale(tx_size)) * 2;
1032
1033
    tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
    tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
1034
#if CONFIG_PVQ
1035
    tran_low_t *ref_coeff = BLOCK_OFFSET(pd->pvq_ref_coeff, block);
1036
#endif
Yaowu Xu's avatar
Yaowu Xu committed
1037
#if CONFIG_AOM_HIGHBITDEPTH
1038
    const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8;
1039
    *out_dist =
Jingning Han's avatar
Jingning Han committed
1040
        av1_highbd_block_error(coeff, dqcoeff, buffer_length, &this_sse, bd) >>
1041
        shift;
1042
1043
1044
1045
#elif CONFIG_PVQ
    *out_dist = av1_block_error2_c(coeff, dqcoeff, ref_coeff, buffer_length,
                                   &this_sse) >>
                shift;
Jingning Han's avatar
Jingning Han committed
1046
1047
1048
#else
    *out_dist =
        av1_block_error(coeff, dqcoeff, buffer_length, &this_sse) >> shift;
Yaowu Xu's avatar
Yaowu Xu committed
1049
#endif  // CONFIG_AOM_HIGHBITDEPTH
1050
1051
1052
    *out_sse = this_sse >> shift;
  } else {
    const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
Jingning Han's avatar
Jingning Han committed
1053
1054
    const int bsw = block_size_wide[tx_bsize];
    const int bsh = block_size_high[tx_bsize];
1055
1056
    const int src_stride = x->plane[plane].src.stride;
    const int dst_stride = xd->plane[plane].dst.stride;
Jingning Han's avatar
Jingning Han committed
1057
1058
1059
1060
1061
    // Scale the transform block index to pixel unit.
    const int src_idx = (blk_row * src_stride + blk_col)
                        << tx_size_wide_log2[0];
    const int dst_idx = (blk_row * dst_stride + blk_col)
                        << tx_size_wide_log2[0];
1062
1063
1064
    const uint8_t *src = &x->plane[plane].src.buf[src_idx];
    const uint8_t *dst = &xd->plane[plane].dst.buf[dst_idx];
    const tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
Alex Converse's avatar
Alex Converse committed
1065
    const uint16_t eob = p->eobs[block];
1066
1067
1068
1069

    unsigned int tmp;

    assert(cpi != NULL);
Jingning Han's avatar
Jingning Han committed
1070
    assert(tx_size_wide_log2[0] == tx_size_high_log2[0]);
1071
1072
1073
1074

    cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &tmp);
    *out_sse = (int64_t)tmp * 16;

Alex Converse's avatar
Alex Converse committed
1075
    if (eob) {
1076
      const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
Yaowu Xu's avatar
Yaowu Xu committed
1077
#if CONFIG_AOM_HIGHBITDEPTH
1078
      DECLARE_ALIGNED(16, uint16_t, recon16[MAX_TX_SQUARE]);
1079
      uint8_t *recon = (uint8_t *)recon16;
Jingning Han's avatar
Jingning Han committed
1080
#else
1081
      DECLARE_ALIGNED(16, uint8_t, recon[MAX_TX_SQUARE]);
Yaowu Xu's avatar
Yaowu Xu committed
1082
#endif  // CONFIG_AOM_HIGHBITDEPTH
1083
1084
1085
1086
1087
1088
1089

      const PLANE_TYPE plane_type = plane == 0 ? PLANE_TYPE_Y : PLANE_TYPE_UV;

      INV_TXFM_PARAM inv_txfm_param;

      inv_txfm_param.tx_type = get_tx_type(plane_type, xd, block, tx_size);
      inv_txfm_param.tx_size = tx_size;
Alex Converse's avatar
Alex Converse committed
1090
      inv_txfm_param.eob = eob;
1091
1092
      inv_txfm_param.lossless = xd->lossless[mbmi->segment_id];

Yaowu Xu's avatar
Yaowu Xu committed
1093
#if CONFIG_AOM_HIGHBITDEPTH
1094
1095
1096
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        recon = CONVERT_TO_BYTEPTR(recon);
        inv_txfm_param.bd = xd->bd;
Yaowu Xu's avatar
Yaowu Xu committed
1097
        aom_highbd_convolve_copy(dst, dst_stride, recon, MAX_TX_SIZE, NULL, 0,
1098
                                 NULL, 0, bsw, bsh, xd->bd);
1099
        highbd_inv_txfm_add(dqcoeff, recon, MAX_TX_SIZE, &inv_txfm_param);
1100
      } else
Yaowu Xu's avatar
Yaowu Xu committed
1101
#endif  // CONFIG_AOM_HIGHBITDEPTH
1102
      {
1103
#if !CONFIG_PVQ
Yaowu Xu's avatar
Yaowu Xu committed
1104
        aom_convolve_copy(dst, dst_stride, recon, MAX_TX_SIZE, NULL, 0, NULL, 0,
1105
                          bsw, bsh);
1106
1107
1108
1109
1110
1111
#else
        int i, j;

        for (j = 0; j < bsh; j++)
          for (i = 0; i < bsw; i++) recon[j * MAX_TX_SIZE + i] = 0;
#endif
1112
        inv_txfm_add(dqcoeff, recon, MAX_TX_SIZE, &inv_txfm_param);
1113
1114
      }

1115
      cpi->fn_ptr[tx_bsize].vf(src, src_stride, recon, MAX_TX_SIZE, &tmp);
1116
1117
1118
1119
    }

    *out_dist = (int64_t)tmp * 16;
  }
Jingning Han's avatar
Jingning Han committed
1120
1121
}

1122
#if !CONFIG_PVQ
Debargha Mukherjee's avatar
Debargha Mukherjee committed
1123
static int rate_block(int plane, int block, int coeff_ctx, TX_SIZE tx_size,
1124
                      struct rdcost_block_args *args) {
1125
1126
1127
  return av1_cost_coeffs(&args->cpi->common, args->x, plane, block, coeff_ctx,
                         tx_size, args->scan_order->scan,
                         args->scan_order->neighbors,
1128
                         args->use_fast_coef_costing);
Jingning Han's avatar
Jingning Han committed
1129
}
1130
#endif
Jingning Han's avatar
Jingning Han committed
1131

1132
1133
1134
1135
1136
static uint64_t sum_squares_2d(const int16_t *diff, int diff_stride,
                               TX_SIZE tx_size) {
  uint64_t sse;
  switch (tx_size) {
    case TX_4X8:
Yaowu Xu's avatar
Yaowu Xu committed
1137
1138
      sse = aom_sum_squares_2d_i16(diff, diff_stride, 4) +
            aom_sum_squares_2d_i16(diff + 4 * diff_stride, diff_stride, 4);
1139
1140
      break;
    case TX_8X4:
Yaowu Xu's avatar
Yaowu Xu committed
1141
1142
      sse = aom_sum_squares_2d_i16(diff, diff_stride, 4) +
            aom_sum_squares_2d_i16(diff + 4, diff_stride, 4);
1143
      break;
1144
    case TX_8X16:
Yaowu Xu's avatar
Yaowu Xu committed
1145
1146
      sse = aom_sum_squares_2d_i16(diff, diff_stride, 8) +
            aom_sum_squares_2d_i16(diff + 8 * diff_stride, diff_stride, 8);
1147
1148
      break;
    case TX_16X8:
Yaowu Xu's avatar
Yaowu Xu committed
1149
1150
      sse = aom_sum_squares_2d_i16(diff, diff_stride, 8) +
            aom_sum_squares_2d_i16(diff + 8, diff_stride, 8);
1151
1152
      break;
    case TX_16X32:
Yaowu Xu's avatar
Yaowu Xu committed
1153
1154
      sse = aom_sum_squares_2d_i16(diff, diff_stride, 16) +
            aom_sum_squares_2d_i16(diff + 16 * diff_stride, diff_stride, 16);
1155
1156
      break;
    case TX_32X16:
Yaowu Xu's avatar
Yaowu Xu committed
1157
1158
      sse = aom_sum_squares_2d_i16(diff, diff_stride, 16) +
            aom_sum_squares_2d_i16(diff + 16, diff_stride, 16);
1159
      break;
1160
1161
    default:
      assert(tx_size < TX_SIZES);
1162
      sse = aom_sum_squares_2d_i16(diff, diff_stride, tx_size_wide[tx_size]);
1163
1164
1165
1166
1167
      break;
  }
  return sse;
}

1168
static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
1169
                          BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
Jingning Han's avatar
Jingning Han committed
1170
1171
1172
1173
  struct rdcost_block_args *args = arg;
  MACROBLOCK *const x = args->x;
  MACROBLOCKD *const xd = &x->e_mbd;
  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Angie Chiang's avatar
Angie Chiang committed
1174
  const AV1_COMMON *cm = &args->cpi->common;
Jingning Han's avatar
Jingning Han committed
1175
  int64_t rd1, rd2, rd;
1176
1177
  int coeff_ctx = combine_entropy_contexts(*(args->t_above + blk_col),
                                           *(args->t_left + blk_row));