reconinter.c 134 KB
Newer Older
Jingning Han's avatar
Jingning Han committed
1
/*
Yaowu Xu's avatar
Yaowu Xu committed
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Jingning Han's avatar
Jingning Han committed
3
 *
Yaowu Xu's avatar
Yaowu Xu committed
4
5
6
7
8
9
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Jingning Han's avatar
Jingning Han committed
10
11
12
13
 */

#include <assert.h>

Yaowu Xu's avatar
Yaowu Xu committed
14
15
16
#include "./aom_scale_rtcd.h"
#include "./aom_dsp_rtcd.h"
#include "./aom_config.h"
Jingning Han's avatar
Jingning Han committed
17

Yaowu Xu's avatar
Yaowu Xu committed
18
#include "aom/aom_integer.h"
19
#include "aom_dsp/blend.h"
Jingning Han's avatar
Jingning Han committed
20

21
22
23
#include "av1/common/blockd.h"
#include "av1/common/reconinter.h"
#include "av1/common/reconintra.h"
Yue Chen's avatar
Yue Chen committed
24
#if CONFIG_MOTION_VAR
25
#include "av1/common/onyxc_int.h"
Yue Chen's avatar
Yue Chen committed
26
#endif  // CONFIG_MOTION_VAR
Jingning Han's avatar
Jingning Han committed
27

28
#if CONFIG_EXT_INTER
29

clang-format's avatar
clang-format committed
30
#define NSMOOTHERS 1
31

32
// [smoother][negative][direction]
clang-format's avatar
clang-format committed
33
34
35
DECLARE_ALIGNED(16, static uint8_t,
                wedge_mask_obl[NSMOOTHERS][2][WEDGE_DIRECTIONS]
                              [MASK_MASTER_SIZE * MASK_MASTER_SIZE]);
36

clang-format's avatar
clang-format committed
37
DECLARE_ALIGNED(16, static uint8_t,
38
                wedge_signflip_lookup[BLOCK_SIZES_ALL][MAX_WEDGE_TYPES]);
39

40
// 4 * MAX_WEDGE_SQUARE is an easy to compute and fairly tight upper bound
41
// on the sum of all mask sizes up to an including MAX_WEDGE_SQUARE.
clang-format's avatar
clang-format committed
42
DECLARE_ALIGNED(16, static uint8_t,
43
                wedge_mask_buf[2 * MAX_WEDGE_TYPES * 4 * MAX_WEDGE_SQUARE]);
44

45
static wedge_masks_type wedge_masks[BLOCK_SIZES_ALL][2];
46

47
// Some unused wedge codebooks left temporarily to facilitate experiments.
48
49
// To be removed when settled.
/*
50
static wedge_code_type wedge_codebook_8_hgtw[8] = {
clang-format's avatar
clang-format committed
51
52
53
54
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
55
56
};

57
static wedge_code_type wedge_codebook_8_hltw[8] = {
clang-format's avatar
clang-format committed
58
59
60
61
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
62
63
};

64
static wedge_code_type wedge_codebook_8_heqw[8] = {
clang-format's avatar
clang-format committed
65
66
67
68
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
  { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
  { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 6, 4 },
69
};
70
71

static const wedge_code_type wedge_codebook_32_hgtw[32] = {
clang-format's avatar
clang-format committed
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
  { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 },
  { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 },
  { WEDGE_OBLIQUE27, 4, 1 },  { WEDGE_OBLIQUE27, 4, 2 },
  { WEDGE_OBLIQUE27, 4, 3 },  { WEDGE_OBLIQUE27, 4, 5 },
  { WEDGE_OBLIQUE27, 4, 6 },  { WEDGE_OBLIQUE27, 4, 7 },
  { WEDGE_OBLIQUE153, 4, 1 }, { WEDGE_OBLIQUE153, 4, 2 },
  { WEDGE_OBLIQUE153, 4, 3 }, { WEDGE_OBLIQUE153, 4, 5 },
  { WEDGE_OBLIQUE153, 4, 6 }, { WEDGE_OBLIQUE153, 4, 7 },
  { WEDGE_OBLIQUE63, 1, 4 },  { WEDGE_OBLIQUE63, 2, 4 },
  { WEDGE_OBLIQUE63, 3, 4 },  { WEDGE_OBLIQUE63, 5, 4 },
  { WEDGE_OBLIQUE63, 6, 4 },  { WEDGE_OBLIQUE63, 7, 4 },
  { WEDGE_OBLIQUE117, 1, 4 }, { WEDGE_OBLIQUE117, 2, 4 },
  { WEDGE_OBLIQUE117, 3, 4 }, { WEDGE_OBLIQUE117, 5, 4 },
  { WEDGE_OBLIQUE117, 6, 4 }, { WEDGE_OBLIQUE117, 7, 4 },
88
89
};

90
static const wedge_code_type wedge_codebook_32_hltw[32] = {
clang-format's avatar
clang-format committed
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
  { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 4, 4 },
  { WEDGE_VERTICAL, 6, 4 },   { WEDGE_HORIZONTAL, 4, 4 },
  { WEDGE_OBLIQUE27, 4, 1 },  { WEDGE_OBLIQUE27, 4, 2 },
  { WEDGE_OBLIQUE27, 4, 3 },  { WEDGE_OBLIQUE27, 4, 5 },
  { WEDGE_OBLIQUE27, 4, 6 },  { WEDGE_OBLIQUE27, 4, 7 },
  { WEDGE_OBLIQUE153, 4, 1 }, { WEDGE_OBLIQUE153, 4, 2 },
  { WEDGE_OBLIQUE153, 4, 3 }, { WEDGE_OBLIQUE153, 4, 5 },
  { WEDGE_OBLIQUE153, 4, 6 }, { WEDGE_OBLIQUE153, 4, 7 },
  { WEDGE_OBLIQUE63, 1, 4 },  { WEDGE_OBLIQUE63, 2, 4 },
  { WEDGE_OBLIQUE63, 3, 4 },  { WEDGE_OBLIQUE63, 5, 4 },
  { WEDGE_OBLIQUE63, 6, 4 },  { WEDGE_OBLIQUE63, 7, 4 },
  { WEDGE_OBLIQUE117, 1, 4 }, { WEDGE_OBLIQUE117, 2, 4 },
  { WEDGE_OBLIQUE117, 3, 4 }, { WEDGE_OBLIQUE117, 5, 4 },
  { WEDGE_OBLIQUE117, 6, 4 }, { WEDGE_OBLIQUE117, 7, 4 },
107
108
};

109
static const wedge_code_type wedge_codebook_32_heqw[32] = {
clang-format's avatar
clang-format committed
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
  { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
  { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 6, 4 },
  { WEDGE_OBLIQUE27, 4, 1 },  { WEDGE_OBLIQUE27, 4, 2 },
  { WEDGE_OBLIQUE27, 4, 3 },  { WEDGE_OBLIQUE27, 4, 5 },
  { WEDGE_OBLIQUE27, 4, 6 },  { WEDGE_OBLIQUE27, 4, 7 },
  { WEDGE_OBLIQUE153, 4, 1 }, { WEDGE_OBLIQUE153, 4, 2 },
  { WEDGE_OBLIQUE153, 4, 3 }, { WEDGE_OBLIQUE153, 4, 5 },
  { WEDGE_OBLIQUE153, 4, 6 }, { WEDGE_OBLIQUE153, 4, 7 },
  { WEDGE_OBLIQUE63, 1, 4 },  { WEDGE_OBLIQUE63, 2, 4 },
  { WEDGE_OBLIQUE63, 3, 4 },  { WEDGE_OBLIQUE63, 5, 4 },
  { WEDGE_OBLIQUE63, 6, 4 },  { WEDGE_OBLIQUE63, 7, 4 },
  { WEDGE_OBLIQUE117, 1, 4 }, { WEDGE_OBLIQUE117, 2, 4 },
  { WEDGE_OBLIQUE117, 3, 4 }, { WEDGE_OBLIQUE117, 5, 4 },
  { WEDGE_OBLIQUE117, 6, 4 }, { WEDGE_OBLIQUE117, 7, 4 },
126
};
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
*/

static const wedge_code_type wedge_codebook_16_hgtw[16] = {
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
  { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 },
  { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 },
  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
};

static const wedge_code_type wedge_codebook_16_hltw[16] = {
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
  { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 4, 4 },
  { WEDGE_VERTICAL, 6, 4 },   { WEDGE_HORIZONTAL, 4, 4 },
  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
};

static const wedge_code_type wedge_codebook_16_heqw[16] = {
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
  { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
  { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 6, 4 },
  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
};
161

162
const wedge_params_type wedge_params_lookup[BLOCK_SIZES_ALL] = {
163
#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
164
165
166
  { 0, NULL, NULL, 0, NULL },
  { 0, NULL, NULL, 0, NULL },
  { 0, NULL, NULL, 0, NULL },
167
#endif  // CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
clang-format's avatar
clang-format committed
168
169
170
  { 0, NULL, NULL, 0, NULL },
  { 0, NULL, NULL, 0, NULL },
  { 0, NULL, NULL, 0, NULL },
171
#if CONFIG_WEDGE
172
  { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_8X8], 0,
173
    wedge_masks[BLOCK_8X8] },
174
  { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X16], 0,
175
    wedge_masks[BLOCK_8X16] },
176
  { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X8], 0,
177
    wedge_masks[BLOCK_16X8] },
178
  { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_16X16], 0,
179
    wedge_masks[BLOCK_16X16] },
180
  { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_16X32], 0,
181
    wedge_masks[BLOCK_16X32] },
182
  { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X16], 0,
183
    wedge_masks[BLOCK_32X16] },
184
  { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_32X32], 0,
185
    wedge_masks[BLOCK_32X32] },
186
  { 0, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_32X64], 0,
187
    wedge_masks[BLOCK_32X64] },
188
  { 0, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_64X32], 0,
189
    wedge_masks[BLOCK_64X32] },
190
  { 0, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_64X64], 0,
191
192
    wedge_masks[BLOCK_64X64] },
#else
193
  { 0, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_8X8], 0,
194
    wedge_masks[BLOCK_8X8] },
195
  { 0, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X16], 0,
196
    wedge_masks[BLOCK_8X16] },
197
  { 0, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X8], 0,
198
    wedge_masks[BLOCK_16X8] },
199
  { 0, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_16X16], 0,
200
    wedge_masks[BLOCK_16X16] },
201
  { 0, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_16X32], 0,
202
    wedge_masks[BLOCK_16X32] },
203
  { 0, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X16], 0,
204
    wedge_masks[BLOCK_32X16] },
205
  { 0, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_32X32], 0,
206
    wedge_masks[BLOCK_32X32] },
207
  { 0, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_32X64], 0,
208
    wedge_masks[BLOCK_32X64] },
209
  { 0, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_64X32], 0,
210
    wedge_masks[BLOCK_64X32] },
211
  { 0, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_64X64], 0,
212
    wedge_masks[BLOCK_64X64] },
213
#endif  // CONFIG_WEDGE
Debargha Mukherjee's avatar
Debargha Mukherjee committed
214
#if CONFIG_EXT_PARTITION
clang-format's avatar
clang-format committed
215
216
217
  { 0, NULL, NULL, 0, NULL },
  { 0, NULL, NULL, 0, NULL },
  { 0, NULL, NULL, 0, NULL },
Debargha Mukherjee's avatar
Debargha Mukherjee committed
218
#endif  // CONFIG_EXT_PARTITION
219
220
221
222
223
224
225
226
  { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_4X16], 0,
    wedge_masks[BLOCK_4X16] },
  { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X4], 0,
    wedge_masks[BLOCK_16X4] },
  { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X32], 0,
    wedge_masks[BLOCK_8X32] },
  { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X8], 0,
    wedge_masks[BLOCK_32X8] },
227
};
228

clang-format's avatar
clang-format committed
229
static const uint8_t *get_wedge_mask_inplace(int wedge_index, int neg,
230
                                             BLOCK_SIZE sb_type) {
Debargha Mukherjee's avatar
Debargha Mukherjee committed
231
  const uint8_t *master;
232
233
  const int bh = block_size_high[sb_type];
  const int bw = block_size_wide[sb_type];
234
235
236
  const wedge_code_type *a =
      wedge_params_lookup[sb_type].codebook + wedge_index;
  const int smoother = wedge_params_lookup[sb_type].smoother;
237
  int woff, hoff;
238
239
240
241
242
243
244
  const uint8_t wsignflip = wedge_params_lookup[sb_type].signflip[wedge_index];

  assert(wedge_index >= 0 &&
         wedge_index < (1 << get_wedge_bits_lookup(sb_type)));
  woff = (a->x_offset * bw) >> 3;
  hoff = (a->y_offset * bh) >> 3;
  master = wedge_mask_obl[smoother][neg ^ wsignflip][a->direction] +
clang-format's avatar
clang-format committed
245
246
           MASK_MASTER_STRIDE * (MASK_MASTER_SIZE / 2 - hoff) +
           MASK_MASTER_SIZE / 2 - woff;
Debargha Mukherjee's avatar
Debargha Mukherjee committed
247
248
249
  return master;
}

Yaowu Xu's avatar
Yaowu Xu committed
250
251
252
const uint8_t *av1_get_soft_mask(int wedge_index, int wedge_sign,
                                 BLOCK_SIZE sb_type, int offset_x,
                                 int offset_y) {
253
  const uint8_t *mask =
254
      get_wedge_mask_inplace(wedge_index, wedge_sign, sb_type);
clang-format's avatar
clang-format committed
255
  if (mask) mask -= (offset_x + offset_y * MASK_MASTER_STRIDE);
256
  return mask;
257
258
}

259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
#if CONFIG_COMPOUND_SEGMENT
static uint8_t *invert_mask(uint8_t *mask_inv_buffer, const uint8_t *const mask,
                            int h, int w, int stride) {
  int i, j;

  for (i = 0; i < h; ++i)
    for (j = 0; j < w; ++j) {
      mask_inv_buffer[i * stride + j] =
          AOM_BLEND_A64_MAX_ALPHA - mask[i * stride + j];
    }
  return mask_inv_buffer;
}
#endif  // CONFIG_COMPOUND_SEGMENT

const uint8_t *av1_get_compound_type_mask_inverse(
    const INTERINTER_COMPOUND_DATA *const comp_data,
#if CONFIG_COMPOUND_SEGMENT
    uint8_t *mask_buffer, int h, int w, int stride,
#endif
    BLOCK_SIZE sb_type) {
279
  assert(is_masked_compound_type(comp_data->interinter_compound_type));
280
  (void)sb_type;
281
  switch (comp_data->interinter_compound_type) {
282
#if CONFIG_WEDGE
283
    case COMPOUND_WEDGE:
284
285
      return av1_get_contiguous_soft_mask(comp_data->wedge_index,
                                          !comp_data->wedge_sign, sb_type);
286
#endif  // CONFIG_WEDGE
287
288
#if CONFIG_COMPOUND_SEGMENT
    case COMPOUND_SEG:
289
      return invert_mask(mask_buffer, comp_data->seg_mask, h, w, stride);
290
#endif  // CONFIG_COMPOUND_SEGMENT
291
292
293
    default: assert(0); return NULL;
  }
}
294

295
296
const uint8_t *av1_get_compound_type_mask(
    const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type) {
297
  assert(is_masked_compound_type(comp_data->interinter_compound_type));
298
  (void)sb_type;
299
  switch (comp_data->interinter_compound_type) {
300
#if CONFIG_WEDGE
301
302
303
    case COMPOUND_WEDGE:
      return av1_get_contiguous_soft_mask(comp_data->wedge_index,
                                          comp_data->wedge_sign, sb_type);
304
#endif  // CONFIG_WEDGE
305
#if CONFIG_COMPOUND_SEGMENT
306
307
308
309
310
311
312
    case COMPOUND_SEG: return comp_data->seg_mask;
#endif  // CONFIG_COMPOUND_SEGMENT
    default: assert(0); return NULL;
  }
}

#if CONFIG_COMPOUND_SEGMENT
313
314
315
#if COMPOUND_SEGMENT_TYPE == 0
static void uniform_mask(uint8_t *mask, int which_inverse, BLOCK_SIZE sb_type,
                         int h, int w, int mask_val) {
316
317
318
319
320
321
322
323
324
325
  int i, j;
  int block_stride = block_size_wide[sb_type];
  for (i = 0; i < h; ++i)
    for (j = 0; j < w; ++j) {
      mask[i * block_stride + j] =
          which_inverse ? AOM_BLEND_A64_MAX_ALPHA - mask_val : mask_val;
    }
}

void build_compound_seg_mask(uint8_t *mask, SEG_MASK_TYPE mask_type,
326
327
328
329
330
331
332
                             const uint8_t *src0, int src0_stride,
                             const uint8_t *src1, int src1_stride,
                             BLOCK_SIZE sb_type, int h, int w) {
  (void)src0;
  (void)src1;
  (void)src0_stride;
  (void)src1_stride;
333
334
335
336
337
  switch (mask_type) {
    case UNIFORM_45: uniform_mask(mask, 0, sb_type, h, w, 45); break;
    case UNIFORM_45_INV: uniform_mask(mask, 1, sb_type, h, w, 45); break;
    default: assert(0);
  }
338
}
339

340
#if CONFIG_HIGHBITDEPTH
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
void build_compound_seg_mask_highbd(uint8_t *mask, SEG_MASK_TYPE mask_type,
                                    const uint8_t *src0, int src0_stride,
                                    const uint8_t *src1, int src1_stride,
                                    BLOCK_SIZE sb_type, int h, int w, int bd) {
  (void)src0;
  (void)src1;
  (void)src0_stride;
  (void)src1_stride;
  (void)bd;
  switch (mask_type) {
    case UNIFORM_45: uniform_mask(mask, 0, sb_type, h, w, 45); break;
    case UNIFORM_45_INV: uniform_mask(mask, 1, sb_type, h, w, 45); break;
    default: assert(0);
  }
}
356
#endif  // CONFIG_HIGHBITDEPTH
357
358
359

#elif COMPOUND_SEGMENT_TYPE == 1
#define DIFF_FACTOR 16
360
361
362
363
364
365

#if CONFIG_CONVOLVE_ROUND
static void diffwtd_mask_d32(uint8_t *mask, int which_inverse, int mask_base,
                             const int32_t *src0, int src0_stride,
                             const int32_t *src1, int src1_stride,
                             BLOCK_SIZE sb_type, int h, int w,
366
367
368
                             ConvolveParams *conv_params, int bd) {
  int round =
      2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1 + (bd - 8);
369
370
371
372
  int i, j, m, diff;
  int block_stride = block_size_wide[sb_type];
  for (i = 0; i < h; ++i) {
    for (j = 0; j < w; ++j) {
373
374
      diff = abs(src0[i * src0_stride + j] - src1[i * src1_stride + j]);
      diff = ROUND_POWER_OF_TWO(diff, round);
375
376
377
378
379
380
381
382
383
384
385
      m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
      mask[i * block_stride + j] =
          which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
    }
  }
}

static void build_compound_seg_mask_d32(uint8_t *mask, SEG_MASK_TYPE mask_type,
                                        const int32_t *src0, int src0_stride,
                                        const int32_t *src1, int src1_stride,
                                        BLOCK_SIZE sb_type, int h, int w,
386
                                        ConvolveParams *conv_params, int bd) {
387
388
389
  switch (mask_type) {
    case DIFFWTD_38:
      diffwtd_mask_d32(mask, 0, 38, src0, src0_stride, src1, src1_stride,
390
                       sb_type, h, w, conv_params, bd);
391
392
393
      break;
    case DIFFWTD_38_INV:
      diffwtd_mask_d32(mask, 1, 38, src0, src0_stride, src1, src1_stride,
394
                       sb_type, h, w, conv_params, bd);
395
396
397
398
399
400
      break;
    default: assert(0);
  }
}
#endif

401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
static void diffwtd_mask(uint8_t *mask, int which_inverse, int mask_base,
                         const uint8_t *src0, int src0_stride,
                         const uint8_t *src1, int src1_stride,
                         BLOCK_SIZE sb_type, int h, int w) {
  int i, j, m, diff;
  int block_stride = block_size_wide[sb_type];
  for (i = 0; i < h; ++i) {
    for (j = 0; j < w; ++j) {
      diff =
          abs((int)src0[i * src0_stride + j] - (int)src1[i * src1_stride + j]);
      m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
      mask[i * block_stride + j] =
          which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
    }
  }
}

void build_compound_seg_mask(uint8_t *mask, SEG_MASK_TYPE mask_type,
                             const uint8_t *src0, int src0_stride,
                             const uint8_t *src1, int src1_stride,
                             BLOCK_SIZE sb_type, int h, int w) {
  switch (mask_type) {
423
424
    case DIFFWTD_38:
      diffwtd_mask(mask, 0, 38, src0, src0_stride, src1, src1_stride, sb_type,
425
426
                   h, w);
      break;
427
428
    case DIFFWTD_38_INV:
      diffwtd_mask(mask, 1, 38, src0, src0_stride, src1, src1_stride, sb_type,
429
430
431
432
433
434
                   h, w);
      break;
    default: assert(0);
  }
}

435
#if CONFIG_HIGHBITDEPTH
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
static void diffwtd_mask_highbd(uint8_t *mask, int which_inverse, int mask_base,
                                const uint16_t *src0, int src0_stride,
                                const uint16_t *src1, int src1_stride,
                                BLOCK_SIZE sb_type, int h, int w, int bd) {
  int i, j, m, diff;
  int block_stride = block_size_wide[sb_type];
  for (i = 0; i < h; ++i) {
    for (j = 0; j < w; ++j) {
      diff = abs((int)src0[i * src0_stride + j] -
                 (int)src1[i * src1_stride + j]) >>
             (bd - 8);
      m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
      mask[i * block_stride + j] =
          which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
    }
  }
}

void build_compound_seg_mask_highbd(uint8_t *mask, SEG_MASK_TYPE mask_type,
                                    const uint8_t *src0, int src0_stride,
                                    const uint8_t *src1, int src1_stride,
                                    BLOCK_SIZE sb_type, int h, int w, int bd) {
  switch (mask_type) {
Yaowu Xu's avatar
Yaowu Xu committed
459
    case DIFFWTD_38:
460
461
462
463
      diffwtd_mask_highbd(mask, 0, 42, CONVERT_TO_SHORTPTR(src0), src0_stride,
                          CONVERT_TO_SHORTPTR(src1), src1_stride, sb_type, h, w,
                          bd);
      break;
Yaowu Xu's avatar
Yaowu Xu committed
464
    case DIFFWTD_38_INV:
465
466
467
468
469
470
471
      diffwtd_mask_highbd(mask, 1, 42, CONVERT_TO_SHORTPTR(src0), src0_stride,
                          CONVERT_TO_SHORTPTR(src1), src1_stride, sb_type, h, w,
                          bd);
      break;
    default: assert(0);
  }
}
472
#endif  // CONFIG_HIGHBITDEPTH
473
#endif  // COMPOUND_SEGMENT_TYPE
474
#endif  // CONFIG_COMPOUND_SEGMENT
475

476
477
478
479
#if MASK_MASTER_SIZE == 64
static const uint8_t wedge_master_oblique_odd[NSMOOTHERS][MASK_MASTER_SIZE] = {
  {
      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
480
481
      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  2,  6,  18,
      37, 53, 60, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
482
483
484
485
486
487
      64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
  }
};
static const uint8_t wedge_master_oblique_even[NSMOOTHERS][MASK_MASTER_SIZE] = {
  {
      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
488
489
      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  4,  11, 27,
      46, 58, 62, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
490
491
492
493
494
      64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
  }
};
static const uint8_t wedge_master_vertical[NSMOOTHERS][MASK_MASTER_SIZE] = { {
    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
495
496
    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  7,  21,
    43, 57, 62, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
497
498
499
500
501
502
503
504
505
506
507
508
509
510
    64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
} };

static void shift_copy(const uint8_t *src, uint8_t *dst, int shift, int width) {
  if (shift >= 0) {
    memcpy(dst + shift, src, width - shift);
    memset(dst, src[0], shift);
  } else {
    shift = -shift;
    memcpy(dst, src + shift, width - shift);
    memset(dst + width - shift, src[width - 1], shift);
  }
}
#else
511
static const double smoother_param[NSMOOTHERS] = { 3.0 };
512
513
#endif  // MASK_MASTER_SIZE == 64

514
static void init_wedge_master_masks() {
515
516
517
518
519
  int i, j, s;
  const int w = MASK_MASTER_SIZE;
  const int h = MASK_MASTER_SIZE;
  const int stride = MASK_MASTER_STRIDE;
  for (s = 0; s < NSMOOTHERS; s++) {
520
// Note: index [0] stores the masters, and [1] its complement.
521
522
523
524
525
#if MASK_MASTER_SIZE == 64
    // Generate prototype by shifting the masters
    int shift = h / 4;
    for (i = 0; i < h; i += 2) {
      shift_copy(wedge_master_oblique_even[s],
526
                 &wedge_mask_obl[s][0][WEDGE_OBLIQUE63][i * stride], shift,
527
528
529
                 MASK_MASTER_SIZE);
      shift--;
      shift_copy(wedge_master_oblique_odd[s],
530
                 &wedge_mask_obl[s][0][WEDGE_OBLIQUE63][(i + 1) * stride],
531
                 shift, MASK_MASTER_SIZE);
532
      memcpy(&wedge_mask_obl[s][0][WEDGE_VERTICAL][i * stride],
533
534
             wedge_master_vertical[s],
             MASK_MASTER_SIZE * sizeof(wedge_master_vertical[s][0]));
535
      memcpy(&wedge_mask_obl[s][0][WEDGE_VERTICAL][(i + 1) * stride],
536
537
538
539
540
541
542
             wedge_master_vertical[s],
             MASK_MASTER_SIZE * sizeof(wedge_master_vertical[s][0]));
    }
#else
    const int a[2] = { 2, 1 };
    const double asqrt = sqrt(a[0] * a[0] + a[1] * a[1]);
    for (i = 0; i < h; i++) {
543
544
545
      for (j = 0; j < w; ++j) {
        int x = (2 * j + 1 - w);
        int y = (2 * i + 1 - h);
546
547
        double d = (a[0] * x + a[1] * y) / asqrt;
        const int msk = (int)rint((1.0 + tanh(d / smoother_param[s])) * 32);
548
        wedge_mask_obl[s][0][WEDGE_OBLIQUE63][i * stride + j] = msk;
549
        const int mskx = (int)rint((1.0 + tanh(x / smoother_param[s])) * 32);
550
        wedge_mask_obl[s][0][WEDGE_VERTICAL][i * stride + j] = mskx;
551
552
553
554
555
      }
    }
#endif  // MASK_MASTER_SIZE == 64
    for (i = 0; i < h; ++i) {
      for (j = 0; j < w; ++j) {
556
557
        const int msk = wedge_mask_obl[s][0][WEDGE_OBLIQUE63][i * stride + j];
        wedge_mask_obl[s][0][WEDGE_OBLIQUE27][j * stride + i] = msk;
558
        wedge_mask_obl[s][0][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
clang-format's avatar
clang-format committed
559
            wedge_mask_obl[s][0][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] =
560
561
562
563
564
565
                (1 << WEDGE_WEIGHT_BITS) - msk;
        wedge_mask_obl[s][1][WEDGE_OBLIQUE63][i * stride + j] =
            wedge_mask_obl[s][1][WEDGE_OBLIQUE27][j * stride + i] =
                (1 << WEDGE_WEIGHT_BITS) - msk;
        wedge_mask_obl[s][1][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
            wedge_mask_obl[s][1][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] =
566
                msk;
567
568
569
570
        const int mskx = wedge_mask_obl[s][0][WEDGE_VERTICAL][i * stride + j];
        wedge_mask_obl[s][0][WEDGE_HORIZONTAL][j * stride + i] = mskx;
        wedge_mask_obl[s][1][WEDGE_VERTICAL][i * stride + j] =
            wedge_mask_obl[s][1][WEDGE_HORIZONTAL][j * stride + i] =
571
                (1 << WEDGE_WEIGHT_BITS) - mskx;
572
      }
573
    }
574
  }
575
576
577
578
579
580
581
582
}

// If the signs for the wedges for various blocksizes are
// inconsistent flip the sign flag. Do it only once for every
// wedge codebook.
static void init_wedge_signs() {
  BLOCK_SIZE sb_type;
  memset(wedge_signflip_lookup, 0, sizeof(wedge_signflip_lookup));
583
  for (sb_type = BLOCK_4X4; sb_type < BLOCK_SIZES_ALL; ++sb_type) {
584
585
    const int bw = block_size_wide[sb_type];
    const int bh = block_size_high[sb_type];
586
587
588
589
590
591
    const wedge_params_type wedge_params = wedge_params_lookup[sb_type];
    const int wbits = wedge_params.bits;
    const int wtypes = 1 << wbits;
    int i, w;
    if (wbits == 0) continue;
    for (w = 0; w < wtypes; ++w) {
592
      // Get the mask master, i.e. index [0]
593
      const uint8_t *mask = get_wedge_mask_inplace(w, 0, sb_type);
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
      int avg = 0;
      for (i = 0; i < bw; ++i) avg += mask[i];
      for (i = 1; i < bh; ++i) avg += mask[i * MASK_MASTER_STRIDE];
      avg = (avg + (bw + bh - 1) / 2) / (bw + bh - 1);
      // Default sign of this wedge is 1 if the average < 32, 0 otherwise.
      // If default sign is 1:
      //   If sign requested is 0, we need to flip the sign and return
      //   the complement i.e. index [1] instead. If sign requested is 1
      //   we need to flip the sign and return index [0] instead.
      // If default sign is 0:
      //   If sign requested is 0, we need to return index [0] the master
      //   if sign requested is 1, we need to return the complement index [1]
      //   instead.
      wedge_params.signflip[w] = (avg < 32);
      // printf("%d[%d] = %d\n", sb_type, w, wedge_params.signflip[w]);
609
610
611
612
613
614
615
616
    }
  }
}

static void init_wedge_masks() {
  uint8_t *dst = wedge_mask_buf;
  BLOCK_SIZE bsize;
  memset(wedge_masks, 0, sizeof(wedge_masks));
617
  for (bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; ++bsize) {
618
    const uint8_t *mask;
619
620
    const int bw = block_size_wide[bsize];
    const int bh = block_size_high[bsize];
621
622
623
624
625
626
627
    const wedge_params_type *wedge_params = &wedge_params_lookup[bsize];
    const int wbits = wedge_params->bits;
    const int wtypes = 1 << wbits;
    int w;
    if (wbits == 0) continue;
    for (w = 0; w < wtypes; ++w) {
      mask = get_wedge_mask_inplace(w, 0, bsize);
Yaowu Xu's avatar
Yaowu Xu committed
628
      aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw, NULL, 0, NULL, 0, bw,
clang-format's avatar
clang-format committed
629
                        bh);
630
631
632
633
      wedge_params->masks[0][w] = dst;
      dst += bw * bh;

      mask = get_wedge_mask_inplace(w, 1, bsize);
Yaowu Xu's avatar
Yaowu Xu committed
634
      aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw, NULL, 0, NULL, 0, bw,
clang-format's avatar
clang-format committed
635
                        bh);
636
637
638
639
640
641
642
643
      wedge_params->masks[1][w] = dst;
      dst += bw * bh;
    }
    assert(sizeof(wedge_mask_buf) >= (size_t)(dst - wedge_mask_buf));
  }
}

// Equation of line: f(x, y) = a[0]*(x - a[2]*w/8) + a[1]*(y - a[3]*h/8) = 0
Yaowu Xu's avatar
Yaowu Xu committed
644
void av1_init_wedge_masks() {
645
  init_wedge_master_masks();
646
  init_wedge_signs();
647
  init_wedge_masks();
648
649
}

650
651
#if CONFIG_SUPERTX
static void build_masked_compound_wedge_extend(
clang-format's avatar
clang-format committed
652
    uint8_t *dst, int dst_stride, const uint8_t *src0, int src0_stride,
653
654
655
    const uint8_t *src1, int src1_stride,
    const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type,
    int wedge_offset_x, int wedge_offset_y, int h, int w) {
656
657
  const int subh = (2 << b_height_log2_lookup[sb_type]) == h;
  const int subw = (2 << b_width_log2_lookup[sb_type]) == w;
658
659
  const uint8_t *mask;
  size_t mask_stride;
660
  switch (comp_data->interinter_compound_type) {
661
662
663
664
665
666
667
668
669
670
671
672
673
    case COMPOUND_WEDGE:
      mask = av1_get_soft_mask(comp_data->wedge_index, comp_data->wedge_sign,
                               sb_type, wedge_offset_x, wedge_offset_y);
      mask_stride = MASK_MASTER_STRIDE;
      break;
#if CONFIG_COMPOUND_SEGMENT
    case COMPOUND_SEG:
      mask = comp_data->seg_mask;
      mask_stride = block_size_wide[sb_type];
      break;
#endif
    default: assert(0); return;
  }
Yaowu Xu's avatar
Yaowu Xu committed
674
  aom_blend_a64_mask(dst, dst_stride, src0, src0_stride, src1, src1_stride,
675
                     mask, (int)mask_stride, h, w, subh, subw);
676
677
}

678
#if CONFIG_HIGHBITDEPTH
679
static void build_masked_compound_wedge_extend_highbd(
clang-format's avatar
clang-format committed
680
    uint8_t *dst_8, int dst_stride, const uint8_t *src0_8, int src0_stride,
681
682
683
    const uint8_t *src1_8, int src1_stride,
    const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type,
    int wedge_offset_x, int wedge_offset_y, int h, int w, int bd) {
684
685
  const int subh = (2 << b_height_log2_lookup[sb_type]) == h;
  const int subw = (2 << b_width_log2_lookup[sb_type]) == w;
686
687
  const uint8_t *mask;
  size_t mask_stride;
688
  switch (comp_data->interinter_compound_type) {
689
690
691
692
693
694
695
696
697
698
699
700
701
    case COMPOUND_WEDGE:
      mask = av1_get_soft_mask(comp_data->wedge_index, comp_data->wedge_sign,
                               sb_type, wedge_offset_x, wedge_offset_y);
      mask_stride = MASK_MASTER_STRIDE;
      break;
#if CONFIG_COMPOUND_SEGMENT
    case COMPOUND_SEG:
      mask = comp_data->seg_mask;
      mask_stride = block_size_wide[sb_type];
      break;
#endif
    default: assert(0); return;
  }
Yaowu Xu's avatar
Yaowu Xu committed
702
  aom_highbd_blend_a64_mask(dst_8, dst_stride, src0_8, src0_stride, src1_8,
703
704
                            src1_stride, mask, (int)mask_stride, h, w, subh,
                            subw, bd);
705
}
706
#endif  // CONFIG_HIGHBITDEPTH
707
#else
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
#if CONFIG_CONVOLVE_ROUND
static void build_masked_compound_no_round(
    CONV_BUF_TYPE *dst, int dst_stride, const CONV_BUF_TYPE *src0,
    int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride,
    const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h,
    int w) {
  // Derive subsampling from h and w passed in. May be refactored to
  // pass in subsampling factors directly.
  const int subh = (2 << b_height_log2_lookup[sb_type]) == h;
  const int subw = (2 << b_width_log2_lookup[sb_type]) == w;
  const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
  aom_blend_a64_d32_mask(dst, dst_stride, src0, src0_stride, src1, src1_stride,
                         mask, block_size_wide[sb_type], h, w, subh, subw);
}
#endif  // CONFIG_CONVOLVE_ROUND
723
724
725
726
727
static void build_masked_compound(
    uint8_t *dst, int dst_stride, const uint8_t *src0, int src0_stride,
    const uint8_t *src1, int src1_stride,
    const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h,
    int w) {
728
729
730
731
  // Derive subsampling from h and w passed in. May be refactored to
  // pass in subsampling factors directly.
  const int subh = (2 << b_height_log2_lookup[sb_type]) == h;
  const int subw = (2 << b_width_log2_lookup[sb_type]) == w;
732
  const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
Yaowu Xu's avatar
Yaowu Xu committed
733
  aom_blend_a64_mask(dst, dst_stride, src0, src0_stride, src1, src1_stride,
734
                     mask, block_size_wide[sb_type], h, w, subh, subw);
735
736
}

737
#if CONFIG_HIGHBITDEPTH
738
static void build_masked_compound_highbd(
clang-format's avatar
clang-format committed
739
    uint8_t *dst_8, int dst_stride, const uint8_t *src0_8, int src0_stride,
740
741
742
    const uint8_t *src1_8, int src1_stride,
    const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h,
    int w, int bd) {
743
744
745
746
  // Derive subsampling from h and w passed in. May be refactored to
  // pass in subsampling factors directly.
  const int subh = (2 << b_height_log2_lookup[sb_type]) == h;
  const int subw = (2 << b_width_log2_lookup[sb_type]) == w;
747
748
749
  const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
  // const uint8_t *mask =
  //     av1_get_contiguous_soft_mask(wedge_index, wedge_sign, sb_type);
750
751
752
  aom_highbd_blend_a64_mask(dst_8, dst_stride, src0_8, src0_stride, src1_8,
                            src1_stride, mask, block_size_wide[sb_type], h, w,
                            subh, subw, bd);
753
}
754
#endif  // CONFIG_HIGHBITDEPTH
755
#endif  // CONFIG_SUPERTX
756

Yaowu Xu's avatar
Yaowu Xu committed
757
758
759
760
void av1_make_masked_inter_predictor(const uint8_t *pre, int pre_stride,
                                     uint8_t *dst, int dst_stride,
                                     const int subpel_x, const int subpel_y,
                                     const struct scale_factors *sf, int w,
761
                                     int h, ConvolveParams *conv_params,
762
#if CONFIG_DUAL_FILTER
James Zern's avatar
James Zern committed
763
                                     const InterpFilter *interp_filter,
764
#else
James Zern's avatar
James Zern committed
765
                                     const InterpFilter interp_filter,
766
#endif
Yaowu Xu's avatar
Yaowu Xu committed
767
                                     int xs, int ys,
768
#if CONFIG_SUPERTX
Yaowu Xu's avatar
Yaowu Xu committed
769
                                     int wedge_offset_x, int wedge_offset_y,
770
#endif  // CONFIG_SUPERTX
771
                                     int plane,
772
773
774
775
#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
                                     const WarpTypesAllowed *warp_types,
                                     int p_col, int p_row, int ref,
#endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
776
                                     MACROBLOCKD *xd) {
777
778
  const MODE_INFO *mi = xd->mi[0];

779
780
781
782
783
784
785
786
787
788
789
  const INTERINTER_COMPOUND_DATA comp_data = {
#if CONFIG_WEDGE
    mi->mbmi.wedge_index,
    mi->mbmi.wedge_sign,
#endif  // CONFIG_WEDGE
#if CONFIG_COMPOUND_SEGMENT
    mi->mbmi.mask_type,
    xd->seg_mask,
#endif  // CONFIG_COMPOUND_SEGMENT
    mi->mbmi.interinter_compound_type
  };
790

791
#if CONFIG_HIGHBITDEPTH
792
793
#if CONFIG_CONVOLVE_ROUND
  DECLARE_ALIGNED(16, CONV_BUF_TYPE, tmp_dst2[MAX_SB_SQUARE]);
794
  memset(tmp_dst2, 0, sizeof(tmp_dst2));
795
796
797
798
799
800
801
802
803
804
  int tmp_dst2_stride = MAX_SB_SIZE;
  CONV_BUF_TYPE *org_dst = conv_params->dst;
  int org_dst_stride = conv_params->dst_stride;
  if (conv_params->round == CONVOLVE_OPT_NO_ROUND) {
    conv_params->dst = tmp_dst2;
    conv_params->dst_stride = tmp_dst2_stride;
    // mask compound has its own average mechanism
    conv_params->do_average = 0;
  }
#endif  // CONFIG_CONVOLVE_ROUND
805
  DECLARE_ALIGNED(16, uint8_t, tmp_dst_[2 * MAX_SB_SQUARE]);
clang-format's avatar
clang-format committed
806
807
808
  uint8_t *tmp_dst = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
                         ? CONVERT_TO_BYTEPTR(tmp_dst_)
                         : tmp_dst_;
Yaowu Xu's avatar
Yaowu Xu committed
809
  av1_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE, subpel_x,
810
                           subpel_y, sf, w, h, conv_params, interp_filter,
811
812
813
#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
                           warp_types, p_col, p_row, plane, ref,
#endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
814
815
816
#if CONFIG_MOTION_VAR
                           0, 0,
#endif
817
                           xs, ys, xd);
818
#if CONFIG_COMPOUND_SEGMENT
819
  if (!plane && comp_data.interinter_compound_type == COMPOUND_SEG) {
820
#if CONFIG_CONVOLVE_ROUND
821
822
823
824
825
826
    if (conv_params->round == CONVOLVE_OPT_NO_ROUND) {
      build_compound_seg_mask_d32(comp_data.seg_mask, comp_data.mask_type,
                                  org_dst, org_dst_stride, tmp_dst2,
                                  tmp_dst2_stride, mi->mbmi.sb_type, h, w,
                                  conv_params, xd->bd);
    } else {
827
#endif  // CONFIG_CONVOLVE_ROUND
828
829
830
831
832
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        build_compound_seg_mask_highbd(comp_data.seg_mask, comp_data.mask_type,
                                       dst, dst_stride, tmp_dst, MAX_SB_SIZE,
                                       mi->mbmi.sb_type, h, w, xd->bd);
      } else {
833
834
835
836
        build_compound_seg_mask(comp_data.seg_mask, comp_data.mask_type, dst,
                                dst_stride, tmp_dst, MAX_SB_SIZE,
                                mi->mbmi.sb_type, h, w);
      }
837
#if CONFIG_CONVOLVE_ROUND
838
    }
839
#endif
840
841
  }
#endif  // CONFIG_COMPOUND_SEGMENT
842
843
844
845

#if CONFIG_SUPERTX
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
    build_masked_compound_wedge_extend_highbd(
846
        dst, dst_stride, dst, dst_stride, tmp_dst, MAX_SB_SIZE, &comp_data,
847
848
849
        mi->mbmi.sb_type, wedge_offset_x, wedge_offset_y, h, w, xd->bd);
  else
    build_masked_compound_wedge_extend(
850
        dst, dst_stride, dst, dst_stride, tmp_dst, MAX_SB_SIZE, &comp_data,
851
852
        mi->mbmi.sb_type, wedge_offset_x, wedge_offset_y, h, w);
#else
853
#if CONFIG_CONVOLVE_ROUND
854
855
856
857
858
859
860
861
862
863
  if (conv_params->round == CONVOLVE_OPT_NO_ROUND) {
    build_masked_compound_no_round(org_dst, org_dst_stride, org_dst,
                                   org_dst_stride, tmp_dst2, tmp_dst2_stride,
                                   &comp_data, mi->mbmi.sb_type, h, w);
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      av1_highbd_convolve_rounding(
          org_dst, org_dst_stride, dst, dst_stride, w, h,
          FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1,
          xd->bd);
    } else {
864
865
866
      av1_convolve_rounding(
          org_dst, org_dst_stride, dst, dst_stride, w, h,
          FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1);
867
868
869
    }
    conv_params->do_post_rounding = 0;
  } else {
870
#endif  // CONFIG_CONVOLVE_ROUND
871
872
873
874
875
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      build_masked_compound_highbd(dst, dst_stride, dst, dst_stride, tmp_dst,
                                   MAX_SB_SIZE, &comp_data, mi->mbmi.sb_type, h,
                                   w, xd->bd);
    } else {
876
877
878
      build_masked_compound(dst, dst_stride, dst, dst_stride, tmp_dst,
                            MAX_SB_SIZE, &comp_data, mi->mbmi.sb_type, h, w);
    }
879
#if CONFIG_CONVOLVE_ROUND
880
  }
881
#endif  // CONFIG_CONVOLVE_ROUND
882
#endif  // CONFIG_SUPERTX
883

884
#else  // CONFIG_HIGHBITDEPTH
885
886
887
888
889
890
891
892
893
894
895
896
897
898

#if CONFIG_CONVOLVE_ROUND
  DECLARE_ALIGNED(16, CONV_BUF_TYPE, tmp_dst2[MAX_SB_SQUARE]);
  int tmp_dst2_stride = MAX_SB_SIZE;
  CONV_BUF_TYPE *org_dst = conv_params->dst;
  int org_dst_stride = conv_params->dst_stride;
  if (conv_params->round == CONVOLVE_OPT_NO_ROUND) {
    memset(tmp_dst2, 0, sizeof(tmp_dst2));
    conv_params->dst = tmp_dst2;
    conv_params->dst_stride = tmp_dst2_stride;
    // mask compound has its own average mechanism
    conv_params->do_average = 0;
  }
#endif
899
  DECLARE_ALIGNED(16, uint8_t, tmp_dst[MAX_SB_SQUARE]);
Yaowu Xu's avatar
Yaowu Xu committed
900
  av1_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE, subpel_x,
901
                           subpel_y, sf, w, h, conv_params, interp_filter,
902
903
904
#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
                           warp_types, p_col, p_row, plane, ref,
#endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
905
906
907
#if CONFIG_MOTION_VAR
                           0, 0,
#endif
908
                           xs, ys, xd);
909
#if CONFIG_COMPOUND_SEGMENT
910
911
912
913
914
  if (!plane && comp_data.interinter_compound_type == COMPOUND_SEG) {
#if CONFIG_CONVOLVE_ROUND
    if (conv_params->round == CONVOLVE_OPT_NO_ROUND) {
      build_compound_seg_mask_d32(
          comp_data.seg_mask, comp_data.mask_type, org_dst, org_dst_stride,
915
          tmp_dst2, tmp_dst2_stride, mi->mbmi.sb_type, h, w, conv_params, 8);
916
917
918
919
920
921
922
923
924
    } else {
#endif  // CONFIG_CONVOLVE_ROUND
      build_compound_seg_mask(comp_data.seg_mask, comp_data.mask_type, dst,
                              dst_stride, tmp_dst, MAX_SB_SIZE,
                              mi->mbmi.sb_type, h, w);
#if CONFIG_CONVOLVE_ROUND
    }
#endif
  }
925
#endif  // CONFIG_COMPOUND_SEGMENT
926
927
#if CONFIG_SUPERTX
  build_masked_compound_wedge_extend(dst, dst_stride, dst, dst_stride, tmp_dst,
928
                                     MAX_SB_SIZE, &comp_data, mi->mbmi.sb_type,
929
930
                                     wedge_offset_x, wedge_offset_y, h, w);
#else
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
#if CONFIG_CONVOLVE_ROUND
  if (conv_params->round == CONVOLVE_OPT_NO_ROUND) {
    build_masked_compound_no_round(org_dst, org_dst_stride, org_dst,
                                   org_dst_stride, tmp_dst2, tmp_dst2_stride,
                                   &comp_data, mi->mbmi.sb_type, h, w);
    av1_convolve_rounding(
        org_dst, org_dst_stride, dst, dst_stride, w, h,
        FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1);
    conv_params->do_post_rounding = 0;
  } else {
#endif  // CONFIG_CONVOLVE_ROUND
    build_masked_compound(dst, dst_stride, dst, dst_stride, tmp_dst,
                          MAX_SB_SIZE, &comp_data, mi->mbmi.sb_type, h, w);
#if CONFIG_CONVOLVE_ROUND
  }
#endif  // CONFIG_CONVOLVE_ROUND
947
#endif  // CONFIG_SUPERTX
948
#endif  // CONFIG_HIGHBITDEPTH
949
950
951
#if CONFIG_COMPOUND_SEGMENT
  (void)plane;
#endif  // CONFIG_COMPOUND_SEGMENT
952
953
}
#endif  // CONFIG_EXT_INTER
954

955
956
957
// TODO(sarahparker) av1_highbd_build_inter_predictor and
// av1_build_inter_predictor should be combined with
// av1_make_inter_predictor
958
#if CONFIG_HIGHBITDEPTH
959
960
961
void av1_highbd_build_inter_predictor(
    const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride,
    const MV *src_mv, const struct scale_factors *sf, int w, int h, int ref,
962
#if CONFIG_DUAL_FILTER
963
    const InterpFilter *interp_filter,
964
#else
965
    const InterpFilter interp_filter,
966
#endif
967
968
969
970
971
#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
    const WarpTypesAllowed *warp_types, int p_col, int p_row,
#endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
    int plane, enum mv_precision precision, int x, int y,
    const MACROBLOCKD *xd) {
Jingning Han's avatar
Jingning Han committed
972
973
974
  const int is_q4 = precision == MV_PRECISION_Q4;
  const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row * 2,
                     is_q4 ? src_mv->col : src_mv->col * 2 };
Yaowu Xu's avatar
Yaowu Xu committed
975
  MV32 mv = av1_scale_mv(&mv_q4, x, y, sf);
Fergus Simpson's avatar
Fergus Simpson committed
976
977
  mv.col += SCALE_EXTRA_OFF;
  mv.row += SCALE_EXTRA_OFF;
978
979
  const int subpel_x = mv.col & SCALE_SUBPEL_MASK;
  const int subpel_y = mv.row & SCALE_SUBPEL_MASK;
980
  ConvolveParams conv_params = get_conv_params(ref, ref, plane);
Jingning Han's avatar
Jingning Han committed
981

982
983
  src += (mv.row >> SCALE_SUBPEL_BITS) * src_stride +
         (mv.col >> SCALE_SUBPEL_BITS);
Jingning Han's avatar
Jingning Han committed
984

985
986
  av1_make_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
                           sf, w, h, &conv_params, interp_filter,
987
988
989
#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
                           warp_types, p_col, p_row, plane, ref,
#endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
990
991
992
#if CONFIG_MOTION_VAR
                           0, 0,
#endif
993
994
                           sf->x_step_q4, sf->y_step_q4, xd);
}
995
#endif  // CONFIG_HIGHBITDEPTH
996

Yaowu Xu's avatar
Yaowu Xu committed
997
998
999
void av1_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
                               int dst_stride, const MV *src_mv,
                               const struct scale_factors *sf, int w, int h,
1000
                               ConvolveParams *conv_params,
1001
#if CONFIG_DUAL_FILTER
James Zern's avatar
James Zern committed
1002
                               const InterpFilter *interp_filter,
1003
#else
James Zern's avatar
James Zern committed
1004
                               const InterpFilter interp_filter,
1005
#endif
1006
1007
1008
1009
#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
                               const WarpTypesAllowed *warp_types, int p_col,
                               int p_row, int plane, int ref,
#endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
1010
1011
                               enum mv_precision precision, int x, int y,
                               const MACROBLOCKD *xd) {
Jingning Han's avatar
Jingning Han committed
1012
1013
1014
  const int is_q4 = precision == MV_PRECISION_Q4;
  const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row * 2,
                     is_q4 ? src_mv->col : src_mv->col * 2 };
Yaowu Xu's avatar
Yaowu Xu committed
1015
  MV32 mv = av1_scale_mv(&mv_q4, x, y, sf);
Fergus Simpson's avatar
Fergus Simpson committed
1016
1017
  mv.col += SCALE_EXTRA_OFF;
  mv.row += SCALE_EXTRA_OFF;
1018
1019
  const int subpel_x = mv.col & SCALE_SUBPEL_MASK;
  const int subpel_y = mv.row & SCALE_SUBPEL_MASK;
Jingning Han's avatar
Jingning Han committed
1020

1021
1022
  src += (mv.row >> SCALE_SUBPEL_BITS) * src_stride +
         (mv.col >> SCALE_SUBPEL_BITS);
Jingning Han's avatar
Jingning Han committed
1023

1024
1025
  av1_make_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
                           sf, w, h, conv_params, interp_filter,
1026
1027
1028
#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
                           warp_types, p_col, p_row, plane, ref,
#endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION