reconinter.c 135 KB
Newer Older
Jingning Han's avatar
Jingning Han committed
1
/*
Yaowu Xu's avatar
Yaowu Xu committed
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Jingning Han's avatar
Jingning Han committed
3
 *
Yaowu Xu's avatar
Yaowu Xu committed
4
5
6
7
8
9
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Jingning Han's avatar
Jingning Han committed
10
11
12
13
 */

#include <assert.h>

Yaowu Xu's avatar
Yaowu Xu committed
14
15
16
#include "./aom_scale_rtcd.h"
#include "./aom_dsp_rtcd.h"
#include "./aom_config.h"
Jingning Han's avatar
Jingning Han committed
17

Yaowu Xu's avatar
Yaowu Xu committed
18
#include "aom/aom_integer.h"
19
#include "aom_dsp/blend.h"
Jingning Han's avatar
Jingning Han committed
20

21
22
23
#include "av1/common/blockd.h"
#include "av1/common/reconinter.h"
#include "av1/common/reconintra.h"
Yue Chen's avatar
Yue Chen committed
24
#if CONFIG_MOTION_VAR
25
#include "av1/common/onyxc_int.h"
Yue Chen's avatar
Yue Chen committed
26
#endif  // CONFIG_MOTION_VAR
Jingning Han's avatar
Jingning Han committed
27

28
#if CONFIG_EXT_INTER
29

clang-format's avatar
clang-format committed
30
#define NSMOOTHERS 1
31

32
// [smoother][negative][direction]
clang-format's avatar
clang-format committed
33
34
35
DECLARE_ALIGNED(16, static uint8_t,
                wedge_mask_obl[NSMOOTHERS][2][WEDGE_DIRECTIONS]
                              [MASK_MASTER_SIZE * MASK_MASTER_SIZE]);
36

clang-format's avatar
clang-format committed
37
DECLARE_ALIGNED(16, static uint8_t,
38
                wedge_signflip_lookup[BLOCK_SIZES_ALL][MAX_WEDGE_TYPES]);
39

40
// 4 * MAX_WEDGE_SQUARE is an easy to compute and fairly tight upper bound
41
// on the sum of all mask sizes up to an including MAX_WEDGE_SQUARE.
clang-format's avatar
clang-format committed
42
DECLARE_ALIGNED(16, static uint8_t,
43
                wedge_mask_buf[2 * MAX_WEDGE_TYPES * 4 * MAX_WEDGE_SQUARE]);
44

45
static wedge_masks_type wedge_masks[BLOCK_SIZES_ALL][2];
46

47
// Some unused wedge codebooks left temporarily to facilitate experiments.
48
49
// To be removed when settled.
/*
50
static wedge_code_type wedge_codebook_8_hgtw[8] = {
clang-format's avatar
clang-format committed
51
52
53
54
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
55
56
};

57
static wedge_code_type wedge_codebook_8_hltw[8] = {
clang-format's avatar
clang-format committed
58
59
60
61
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
62
63
};

64
static wedge_code_type wedge_codebook_8_heqw[8] = {
clang-format's avatar
clang-format committed
65
66
67
68
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
  { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
  { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 6, 4 },
69
};
70
71

static const wedge_code_type wedge_codebook_32_hgtw[32] = {
clang-format's avatar
clang-format committed
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
  { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 },
  { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 },
  { WEDGE_OBLIQUE27, 4, 1 },  { WEDGE_OBLIQUE27, 4, 2 },
  { WEDGE_OBLIQUE27, 4, 3 },  { WEDGE_OBLIQUE27, 4, 5 },
  { WEDGE_OBLIQUE27, 4, 6 },  { WEDGE_OBLIQUE27, 4, 7 },
  { WEDGE_OBLIQUE153, 4, 1 }, { WEDGE_OBLIQUE153, 4, 2 },
  { WEDGE_OBLIQUE153, 4, 3 }, { WEDGE_OBLIQUE153, 4, 5 },
  { WEDGE_OBLIQUE153, 4, 6 }, { WEDGE_OBLIQUE153, 4, 7 },
  { WEDGE_OBLIQUE63, 1, 4 },  { WEDGE_OBLIQUE63, 2, 4 },
  { WEDGE_OBLIQUE63, 3, 4 },  { WEDGE_OBLIQUE63, 5, 4 },
  { WEDGE_OBLIQUE63, 6, 4 },  { WEDGE_OBLIQUE63, 7, 4 },
  { WEDGE_OBLIQUE117, 1, 4 }, { WEDGE_OBLIQUE117, 2, 4 },
  { WEDGE_OBLIQUE117, 3, 4 }, { WEDGE_OBLIQUE117, 5, 4 },
  { WEDGE_OBLIQUE117, 6, 4 }, { WEDGE_OBLIQUE117, 7, 4 },
88
89
};

90
static const wedge_code_type wedge_codebook_32_hltw[32] = {
clang-format's avatar
clang-format committed
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
  { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 4, 4 },
  { WEDGE_VERTICAL, 6, 4 },   { WEDGE_HORIZONTAL, 4, 4 },
  { WEDGE_OBLIQUE27, 4, 1 },  { WEDGE_OBLIQUE27, 4, 2 },
  { WEDGE_OBLIQUE27, 4, 3 },  { WEDGE_OBLIQUE27, 4, 5 },
  { WEDGE_OBLIQUE27, 4, 6 },  { WEDGE_OBLIQUE27, 4, 7 },
  { WEDGE_OBLIQUE153, 4, 1 }, { WEDGE_OBLIQUE153, 4, 2 },
  { WEDGE_OBLIQUE153, 4, 3 }, { WEDGE_OBLIQUE153, 4, 5 },
  { WEDGE_OBLIQUE153, 4, 6 }, { WEDGE_OBLIQUE153, 4, 7 },
  { WEDGE_OBLIQUE63, 1, 4 },  { WEDGE_OBLIQUE63, 2, 4 },
  { WEDGE_OBLIQUE63, 3, 4 },  { WEDGE_OBLIQUE63, 5, 4 },
  { WEDGE_OBLIQUE63, 6, 4 },  { WEDGE_OBLIQUE63, 7, 4 },
  { WEDGE_OBLIQUE117, 1, 4 }, { WEDGE_OBLIQUE117, 2, 4 },
  { WEDGE_OBLIQUE117, 3, 4 }, { WEDGE_OBLIQUE117, 5, 4 },
  { WEDGE_OBLIQUE117, 6, 4 }, { WEDGE_OBLIQUE117, 7, 4 },
107
108
};

109
static const wedge_code_type wedge_codebook_32_heqw[32] = {
clang-format's avatar
clang-format committed
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
  { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
  { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 6, 4 },
  { WEDGE_OBLIQUE27, 4, 1 },  { WEDGE_OBLIQUE27, 4, 2 },
  { WEDGE_OBLIQUE27, 4, 3 },  { WEDGE_OBLIQUE27, 4, 5 },
  { WEDGE_OBLIQUE27, 4, 6 },  { WEDGE_OBLIQUE27, 4, 7 },
  { WEDGE_OBLIQUE153, 4, 1 }, { WEDGE_OBLIQUE153, 4, 2 },
  { WEDGE_OBLIQUE153, 4, 3 }, { WEDGE_OBLIQUE153, 4, 5 },
  { WEDGE_OBLIQUE153, 4, 6 }, { WEDGE_OBLIQUE153, 4, 7 },
  { WEDGE_OBLIQUE63, 1, 4 },  { WEDGE_OBLIQUE63, 2, 4 },
  { WEDGE_OBLIQUE63, 3, 4 },  { WEDGE_OBLIQUE63, 5, 4 },
  { WEDGE_OBLIQUE63, 6, 4 },  { WEDGE_OBLIQUE63, 7, 4 },
  { WEDGE_OBLIQUE117, 1, 4 }, { WEDGE_OBLIQUE117, 2, 4 },
  { WEDGE_OBLIQUE117, 3, 4 }, { WEDGE_OBLIQUE117, 5, 4 },
  { WEDGE_OBLIQUE117, 6, 4 }, { WEDGE_OBLIQUE117, 7, 4 },
126
};
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
*/

static const wedge_code_type wedge_codebook_16_hgtw[16] = {
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
  { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 },
  { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 },
  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
};

static const wedge_code_type wedge_codebook_16_hltw[16] = {
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
  { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 4, 4 },
  { WEDGE_VERTICAL, 6, 4 },   { WEDGE_HORIZONTAL, 4, 4 },
  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
};

static const wedge_code_type wedge_codebook_16_heqw[16] = {
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
  { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
  { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 6, 4 },
  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
};
161

162
const wedge_params_type wedge_params_lookup[BLOCK_SIZES_ALL] = {
163
#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
164
165
166
  { 0, NULL, NULL, 0, NULL },
  { 0, NULL, NULL, 0, NULL },
  { 0, NULL, NULL, 0, NULL },
167
#endif  // CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
clang-format's avatar
clang-format committed
168
169
170
  { 0, NULL, NULL, 0, NULL },
  { 0, NULL, NULL, 0, NULL },
  { 0, NULL, NULL, 0, NULL },
171
#if CONFIG_WEDGE
172
  { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_8X8], 0,
173
    wedge_masks[BLOCK_8X8] },
174
  { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X16], 0,
175
    wedge_masks[BLOCK_8X16] },
176
  { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X8], 0,
177
    wedge_masks[BLOCK_16X8] },
178
  { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_16X16], 0,
179
    wedge_masks[BLOCK_16X16] },
180
  { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_16X32], 0,
181
    wedge_masks[BLOCK_16X32] },
182
  { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X16], 0,
183
    wedge_masks[BLOCK_32X16] },
184
  { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_32X32], 0,
185
    wedge_masks[BLOCK_32X32] },
186
  { 0, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_32X64], 0,
187
    wedge_masks[BLOCK_32X64] },
188
  { 0, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_64X32], 0,
189
    wedge_masks[BLOCK_64X32] },
190
  { 0, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_64X64], 0,
191
192
    wedge_masks[BLOCK_64X64] },
#else
193
  { 0, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_8X8], 0,
194
    wedge_masks[BLOCK_8X8] },
195
  { 0, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X16], 0,
196
    wedge_masks[BLOCK_8X16] },
197
  { 0, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X8], 0,
198
    wedge_masks[BLOCK_16X8] },
199
  { 0, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_16X16], 0,
200
    wedge_masks[BLOCK_16X16] },
201
  { 0, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_16X32], 0,
202
    wedge_masks[BLOCK_16X32] },
203
  { 0, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X16], 0,
204
    wedge_masks[BLOCK_32X16] },
205
  { 0, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_32X32], 0,
206
    wedge_masks[BLOCK_32X32] },
207
  { 0, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_32X64], 0,
208
    wedge_masks[BLOCK_32X64] },
209
  { 0, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_64X32], 0,
210
    wedge_masks[BLOCK_64X32] },
211
  { 0, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_64X64], 0,
212
    wedge_masks[BLOCK_64X64] },
213
#endif  // CONFIG_WEDGE
Debargha Mukherjee's avatar
Debargha Mukherjee committed
214
#if CONFIG_EXT_PARTITION
clang-format's avatar
clang-format committed
215
216
217
  { 0, NULL, NULL, 0, NULL },
  { 0, NULL, NULL, 0, NULL },
  { 0, NULL, NULL, 0, NULL },
Debargha Mukherjee's avatar
Debargha Mukherjee committed
218
#endif  // CONFIG_EXT_PARTITION
219
220
221
222
223
224
225
226
  { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_4X16], 0,
    wedge_masks[BLOCK_4X16] },
  { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X4], 0,
    wedge_masks[BLOCK_16X4] },
  { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X32], 0,
    wedge_masks[BLOCK_8X32] },
  { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X8], 0,
    wedge_masks[BLOCK_32X8] },
227
228
229
230
  { 0, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_16X64], 0,
    wedge_masks[BLOCK_8X32] },
  { 0, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_64X16], 0,
    wedge_masks[BLOCK_32X8] },
231
};
232

clang-format's avatar
clang-format committed
233
static const uint8_t *get_wedge_mask_inplace(int wedge_index, int neg,
234
                                             BLOCK_SIZE sb_type) {
Debargha Mukherjee's avatar
Debargha Mukherjee committed
235
  const uint8_t *master;
236
237
  const int bh = block_size_high[sb_type];
  const int bw = block_size_wide[sb_type];
238
239
240
  const wedge_code_type *a =
      wedge_params_lookup[sb_type].codebook + wedge_index;
  const int smoother = wedge_params_lookup[sb_type].smoother;
241
  int woff, hoff;
242
243
244
245
246
247
248
  const uint8_t wsignflip = wedge_params_lookup[sb_type].signflip[wedge_index];

  assert(wedge_index >= 0 &&
         wedge_index < (1 << get_wedge_bits_lookup(sb_type)));
  woff = (a->x_offset * bw) >> 3;
  hoff = (a->y_offset * bh) >> 3;
  master = wedge_mask_obl[smoother][neg ^ wsignflip][a->direction] +
clang-format's avatar
clang-format committed
249
250
           MASK_MASTER_STRIDE * (MASK_MASTER_SIZE / 2 - hoff) +
           MASK_MASTER_SIZE / 2 - woff;
Debargha Mukherjee's avatar
Debargha Mukherjee committed
251
252
253
  return master;
}

Yaowu Xu's avatar
Yaowu Xu committed
254
255
256
const uint8_t *av1_get_soft_mask(int wedge_index, int wedge_sign,
                                 BLOCK_SIZE sb_type, int offset_x,
                                 int offset_y) {
257
  const uint8_t *mask =
258
      get_wedge_mask_inplace(wedge_index, wedge_sign, sb_type);
clang-format's avatar
clang-format committed
259
  if (mask) mask -= (offset_x + offset_y * MASK_MASTER_STRIDE);
260
  return mask;
261
262
}

263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
#if CONFIG_COMPOUND_SEGMENT
static uint8_t *invert_mask(uint8_t *mask_inv_buffer, const uint8_t *const mask,
                            int h, int w, int stride) {
  int i, j;

  for (i = 0; i < h; ++i)
    for (j = 0; j < w; ++j) {
      mask_inv_buffer[i * stride + j] =
          AOM_BLEND_A64_MAX_ALPHA - mask[i * stride + j];
    }
  return mask_inv_buffer;
}
#endif  // CONFIG_COMPOUND_SEGMENT

const uint8_t *av1_get_compound_type_mask_inverse(
    const INTERINTER_COMPOUND_DATA *const comp_data,
#if CONFIG_COMPOUND_SEGMENT
    uint8_t *mask_buffer, int h, int w, int stride,
#endif
    BLOCK_SIZE sb_type) {
283
  assert(is_masked_compound_type(comp_data->interinter_compound_type));
284
  (void)sb_type;
285
  switch (comp_data->interinter_compound_type) {
286
#if CONFIG_WEDGE
287
    case COMPOUND_WEDGE:
288
289
      return av1_get_contiguous_soft_mask(comp_data->wedge_index,
                                          !comp_data->wedge_sign, sb_type);
290
#endif  // CONFIG_WEDGE
291
292
#if CONFIG_COMPOUND_SEGMENT
    case COMPOUND_SEG:
293
      return invert_mask(mask_buffer, comp_data->seg_mask, h, w, stride);
294
#endif  // CONFIG_COMPOUND_SEGMENT
295
296
297
    default: assert(0); return NULL;
  }
}
298

299
300
const uint8_t *av1_get_compound_type_mask(
    const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type) {
301
  assert(is_masked_compound_type(comp_data->interinter_compound_type));
302
  (void)sb_type;
303
  switch (comp_data->interinter_compound_type) {
304
#if CONFIG_WEDGE
305
306
307
    case COMPOUND_WEDGE:
      return av1_get_contiguous_soft_mask(comp_data->wedge_index,
                                          comp_data->wedge_sign, sb_type);
308
#endif  // CONFIG_WEDGE
309
#if CONFIG_COMPOUND_SEGMENT
310
311
312
313
314
315
316
    case COMPOUND_SEG: return comp_data->seg_mask;
#endif  // CONFIG_COMPOUND_SEGMENT
    default: assert(0); return NULL;
  }
}

#if CONFIG_COMPOUND_SEGMENT
317
318
319
#if COMPOUND_SEGMENT_TYPE == 0
static void uniform_mask(uint8_t *mask, int which_inverse, BLOCK_SIZE sb_type,
                         int h, int w, int mask_val) {
320
321
322
323
324
325
326
327
328
329
  int i, j;
  int block_stride = block_size_wide[sb_type];
  for (i = 0; i < h; ++i)
    for (j = 0; j < w; ++j) {
      mask[i * block_stride + j] =
          which_inverse ? AOM_BLEND_A64_MAX_ALPHA - mask_val : mask_val;
    }
}

void build_compound_seg_mask(uint8_t *mask, SEG_MASK_TYPE mask_type,
330
331
332
333
334
335
336
                             const uint8_t *src0, int src0_stride,
                             const uint8_t *src1, int src1_stride,
                             BLOCK_SIZE sb_type, int h, int w) {
  (void)src0;
  (void)src1;
  (void)src0_stride;
  (void)src1_stride;
337
338
339
340
341
  switch (mask_type) {
    case UNIFORM_45: uniform_mask(mask, 0, sb_type, h, w, 45); break;
    case UNIFORM_45_INV: uniform_mask(mask, 1, sb_type, h, w, 45); break;
    default: assert(0);
  }
342
}
343

344
#if CONFIG_HIGHBITDEPTH
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
void build_compound_seg_mask_highbd(uint8_t *mask, SEG_MASK_TYPE mask_type,
                                    const uint8_t *src0, int src0_stride,
                                    const uint8_t *src1, int src1_stride,
                                    BLOCK_SIZE sb_type, int h, int w, int bd) {
  (void)src0;
  (void)src1;
  (void)src0_stride;
  (void)src1_stride;
  (void)bd;
  switch (mask_type) {
    case UNIFORM_45: uniform_mask(mask, 0, sb_type, h, w, 45); break;
    case UNIFORM_45_INV: uniform_mask(mask, 1, sb_type, h, w, 45); break;
    default: assert(0);
  }
}
360
#endif  // CONFIG_HIGHBITDEPTH
361
362
363

#elif COMPOUND_SEGMENT_TYPE == 1
#define DIFF_FACTOR 16
364
365
366
367
368
369

#if CONFIG_CONVOLVE_ROUND
static void diffwtd_mask_d32(uint8_t *mask, int which_inverse, int mask_base,
                             const int32_t *src0, int src0_stride,
                             const int32_t *src1, int src1_stride,
                             BLOCK_SIZE sb_type, int h, int w,
370
371
372
                             ConvolveParams *conv_params, int bd) {
  int round =
      2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1 + (bd - 8);
373
374
375
376
  int i, j, m, diff;
  int block_stride = block_size_wide[sb_type];
  for (i = 0; i < h; ++i) {
    for (j = 0; j < w; ++j) {
377
378
      diff = abs(src0[i * src0_stride + j] - src1[i * src1_stride + j]);
      diff = ROUND_POWER_OF_TWO(diff, round);
379
380
381
382
383
384
385
386
387
388
389
      m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
      mask[i * block_stride + j] =
          which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
    }
  }
}

static void build_compound_seg_mask_d32(uint8_t *mask, SEG_MASK_TYPE mask_type,
                                        const int32_t *src0, int src0_stride,
                                        const int32_t *src1, int src1_stride,
                                        BLOCK_SIZE sb_type, int h, int w,
390
                                        ConvolveParams *conv_params, int bd) {
391
392
393
  switch (mask_type) {
    case DIFFWTD_38:
      diffwtd_mask_d32(mask, 0, 38, src0, src0_stride, src1, src1_stride,
394
                       sb_type, h, w, conv_params, bd);
395
396
397
      break;
    case DIFFWTD_38_INV:
      diffwtd_mask_d32(mask, 1, 38, src0, src0_stride, src1, src1_stride,
398
                       sb_type, h, w, conv_params, bd);
399
400
401
402
403
404
      break;
    default: assert(0);
  }
}
#endif

405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
static void diffwtd_mask(uint8_t *mask, int which_inverse, int mask_base,
                         const uint8_t *src0, int src0_stride,
                         const uint8_t *src1, int src1_stride,
                         BLOCK_SIZE sb_type, int h, int w) {
  int i, j, m, diff;
  int block_stride = block_size_wide[sb_type];
  for (i = 0; i < h; ++i) {
    for (j = 0; j < w; ++j) {
      diff =
          abs((int)src0[i * src0_stride + j] - (int)src1[i * src1_stride + j]);
      m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
      mask[i * block_stride + j] =
          which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
    }
  }
}

void build_compound_seg_mask(uint8_t *mask, SEG_MASK_TYPE mask_type,
                             const uint8_t *src0, int src0_stride,
                             const uint8_t *src1, int src1_stride,
                             BLOCK_SIZE sb_type, int h, int w) {
  switch (mask_type) {
427
428
    case DIFFWTD_38:
      diffwtd_mask(mask, 0, 38, src0, src0_stride, src1, src1_stride, sb_type,
429
430
                   h, w);
      break;
431
432
    case DIFFWTD_38_INV:
      diffwtd_mask(mask, 1, 38, src0, src0_stride, src1, src1_stride, sb_type,
433
434
435
436
437
438
                   h, w);
      break;
    default: assert(0);
  }
}

439
#if CONFIG_HIGHBITDEPTH
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
static void diffwtd_mask_highbd(uint8_t *mask, int which_inverse, int mask_base,
                                const uint16_t *src0, int src0_stride,
                                const uint16_t *src1, int src1_stride,
                                BLOCK_SIZE sb_type, int h, int w, int bd) {
  int i, j, m, diff;
  int block_stride = block_size_wide[sb_type];
  for (i = 0; i < h; ++i) {
    for (j = 0; j < w; ++j) {
      diff = abs((int)src0[i * src0_stride + j] -
                 (int)src1[i * src1_stride + j]) >>
             (bd - 8);
      m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
      mask[i * block_stride + j] =
          which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
    }
  }
}

void build_compound_seg_mask_highbd(uint8_t *mask, SEG_MASK_TYPE mask_type,
                                    const uint8_t *src0, int src0_stride,
                                    const uint8_t *src1, int src1_stride,
                                    BLOCK_SIZE sb_type, int h, int w, int bd) {
  switch (mask_type) {
Yaowu Xu's avatar
Yaowu Xu committed
463
    case DIFFWTD_38:
464
      diffwtd_mask_highbd(mask, 0, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
465
466
467
                          CONVERT_TO_SHORTPTR(src1), src1_stride, sb_type, h, w,
                          bd);
      break;
Yaowu Xu's avatar
Yaowu Xu committed
468
    case DIFFWTD_38_INV:
469
      diffwtd_mask_highbd(mask, 1, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
470
471
472
473
474
475
                          CONVERT_TO_SHORTPTR(src1), src1_stride, sb_type, h, w,
                          bd);
      break;
    default: assert(0);
  }
}
476
#endif  // CONFIG_HIGHBITDEPTH
477
#endif  // COMPOUND_SEGMENT_TYPE
478
#endif  // CONFIG_COMPOUND_SEGMENT
479

480
481
482
483
#if MASK_MASTER_SIZE == 64
static const uint8_t wedge_master_oblique_odd[NSMOOTHERS][MASK_MASTER_SIZE] = {
  {
      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
484
485
      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  2,  6,  18,
      37, 53, 60, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
486
487
488
489
490
491
      64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
  }
};
static const uint8_t wedge_master_oblique_even[NSMOOTHERS][MASK_MASTER_SIZE] = {
  {
      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
492
493
      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  4,  11, 27,
      46, 58, 62, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
494
495
496
497
498
      64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
  }
};
static const uint8_t wedge_master_vertical[NSMOOTHERS][MASK_MASTER_SIZE] = { {
    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
499
500
    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  7,  21,
    43, 57, 62, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
501
502
503
504
505
506
507
508
509
510
511
512
513
514
    64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
} };

static void shift_copy(const uint8_t *src, uint8_t *dst, int shift, int width) {
  if (shift >= 0) {
    memcpy(dst + shift, src, width - shift);
    memset(dst, src[0], shift);
  } else {
    shift = -shift;
    memcpy(dst, src + shift, width - shift);
    memset(dst + width - shift, src[width - 1], shift);
  }
}
#else
515
static const double smoother_param[NSMOOTHERS] = { 3.0 };
516
517
#endif  // MASK_MASTER_SIZE == 64

518
static void init_wedge_master_masks() {
519
520
521
522
523
  int i, j, s;
  const int w = MASK_MASTER_SIZE;
  const int h = MASK_MASTER_SIZE;
  const int stride = MASK_MASTER_STRIDE;
  for (s = 0; s < NSMOOTHERS; s++) {
524
// Note: index [0] stores the masters, and [1] its complement.
525
526
527
528
529
#if MASK_MASTER_SIZE == 64
    // Generate prototype by shifting the masters
    int shift = h / 4;
    for (i = 0; i < h; i += 2) {
      shift_copy(wedge_master_oblique_even[s],
530
                 &wedge_mask_obl[s][0][WEDGE_OBLIQUE63][i * stride], shift,
531
532
533
                 MASK_MASTER_SIZE);
      shift--;
      shift_copy(wedge_master_oblique_odd[s],
534
                 &wedge_mask_obl[s][0][WEDGE_OBLIQUE63][(i + 1) * stride],
535
                 shift, MASK_MASTER_SIZE);
536
      memcpy(&wedge_mask_obl[s][0][WEDGE_VERTICAL][i * stride],
537
538
             wedge_master_vertical[s],
             MASK_MASTER_SIZE * sizeof(wedge_master_vertical[s][0]));
539
      memcpy(&wedge_mask_obl[s][0][WEDGE_VERTICAL][(i + 1) * stride],
540
541
542
543
544
545
546
             wedge_master_vertical[s],
             MASK_MASTER_SIZE * sizeof(wedge_master_vertical[s][0]));
    }
#else
    const int a[2] = { 2, 1 };
    const double asqrt = sqrt(a[0] * a[0] + a[1] * a[1]);
    for (i = 0; i < h; i++) {
547
548
549
      for (j = 0; j < w; ++j) {
        int x = (2 * j + 1 - w);
        int y = (2 * i + 1 - h);
550
551
        double d = (a[0] * x + a[1] * y) / asqrt;
        const int msk = (int)rint((1.0 + tanh(d / smoother_param[s])) * 32);
552
        wedge_mask_obl[s][0][WEDGE_OBLIQUE63][i * stride + j] = msk;
553
        const int mskx = (int)rint((1.0 + tanh(x / smoother_param[s])) * 32);
554
        wedge_mask_obl[s][0][WEDGE_VERTICAL][i * stride + j] = mskx;
555
556
557
558
559
      }
    }
#endif  // MASK_MASTER_SIZE == 64
    for (i = 0; i < h; ++i) {
      for (j = 0; j < w; ++j) {
560
561
        const int msk = wedge_mask_obl[s][0][WEDGE_OBLIQUE63][i * stride + j];
        wedge_mask_obl[s][0][WEDGE_OBLIQUE27][j * stride + i] = msk;
562
        wedge_mask_obl[s][0][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
clang-format's avatar
clang-format committed
563
            wedge_mask_obl[s][0][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] =
564
565
566
567
568
569
                (1 << WEDGE_WEIGHT_BITS) - msk;
        wedge_mask_obl[s][1][WEDGE_OBLIQUE63][i * stride + j] =
            wedge_mask_obl[s][1][WEDGE_OBLIQUE27][j * stride + i] =
                (1 << WEDGE_WEIGHT_BITS) - msk;
        wedge_mask_obl[s][1][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
            wedge_mask_obl[s][1][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] =
570
                msk;
571
572
573
574
        const int mskx = wedge_mask_obl[s][0][WEDGE_VERTICAL][i * stride + j];
        wedge_mask_obl[s][0][WEDGE_HORIZONTAL][j * stride + i] = mskx;
        wedge_mask_obl[s][1][WEDGE_VERTICAL][i * stride + j] =
            wedge_mask_obl[s][1][WEDGE_HORIZONTAL][j * stride + i] =
575
                (1 << WEDGE_WEIGHT_BITS) - mskx;
576
      }
577
    }
578
  }
579
580
581
582
583
584
585
586
}

// If the signs for the wedges for various blocksizes are
// inconsistent flip the sign flag. Do it only once for every
// wedge codebook.
static void init_wedge_signs() {
  BLOCK_SIZE sb_type;
  memset(wedge_signflip_lookup, 0, sizeof(wedge_signflip_lookup));
587
  for (sb_type = BLOCK_4X4; sb_type < BLOCK_SIZES_ALL; ++sb_type) {
588
589
    const int bw = block_size_wide[sb_type];
    const int bh = block_size_high[sb_type];
590
591
592
593
594
595
    const wedge_params_type wedge_params = wedge_params_lookup[sb_type];
    const int wbits = wedge_params.bits;
    const int wtypes = 1 << wbits;
    int i, w;
    if (wbits == 0) continue;
    for (w = 0; w < wtypes; ++w) {
596
      // Get the mask master, i.e. index [0]
597
      const uint8_t *mask = get_wedge_mask_inplace(w, 0, sb_type);
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
      int avg = 0;
      for (i = 0; i < bw; ++i) avg += mask[i];
      for (i = 1; i < bh; ++i) avg += mask[i * MASK_MASTER_STRIDE];
      avg = (avg + (bw + bh - 1) / 2) / (bw + bh - 1);
      // Default sign of this wedge is 1 if the average < 32, 0 otherwise.
      // If default sign is 1:
      //   If sign requested is 0, we need to flip the sign and return
      //   the complement i.e. index [1] instead. If sign requested is 1
      //   we need to flip the sign and return index [0] instead.
      // If default sign is 0:
      //   If sign requested is 0, we need to return index [0] the master
      //   if sign requested is 1, we need to return the complement index [1]
      //   instead.
      wedge_params.signflip[w] = (avg < 32);
      // printf("%d[%d] = %d\n", sb_type, w, wedge_params.signflip[w]);
613
614
615
616
617
618
619
620
    }
  }
}

static void init_wedge_masks() {
  uint8_t *dst = wedge_mask_buf;
  BLOCK_SIZE bsize;
  memset(wedge_masks, 0, sizeof(wedge_masks));
621
  for (bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; ++bsize) {
622
    const uint8_t *mask;
623
624
    const int bw = block_size_wide[bsize];
    const int bh = block_size_high[bsize];
625
626
627
628
629
630
631
    const wedge_params_type *wedge_params = &wedge_params_lookup[bsize];
    const int wbits = wedge_params->bits;
    const int wtypes = 1 << wbits;
    int w;
    if (wbits == 0) continue;
    for (w = 0; w < wtypes; ++w) {
      mask = get_wedge_mask_inplace(w, 0, bsize);
Yaowu Xu's avatar
Yaowu Xu committed
632
      aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw, NULL, 0, NULL, 0, bw,
clang-format's avatar
clang-format committed
633
                        bh);
634
635
636
637
      wedge_params->masks[0][w] = dst;
      dst += bw * bh;

      mask = get_wedge_mask_inplace(w, 1, bsize);
Yaowu Xu's avatar
Yaowu Xu committed
638
      aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw, NULL, 0, NULL, 0, bw,
clang-format's avatar
clang-format committed
639
                        bh);
640
641
642
643
644
645
646
647
      wedge_params->masks[1][w] = dst;
      dst += bw * bh;
    }
    assert(sizeof(wedge_mask_buf) >= (size_t)(dst - wedge_mask_buf));
  }
}

// Equation of line: f(x, y) = a[0]*(x - a[2]*w/8) + a[1]*(y - a[3]*h/8) = 0
Yaowu Xu's avatar
Yaowu Xu committed
648
void av1_init_wedge_masks() {
649
  init_wedge_master_masks();
650
  init_wedge_signs();
651
  init_wedge_masks();
652
653
}

654
655
#if CONFIG_SUPERTX
static void build_masked_compound_wedge_extend(
clang-format's avatar
clang-format committed
656
    uint8_t *dst, int dst_stride, const uint8_t *src0, int src0_stride,
657
658
659
    const uint8_t *src1, int src1_stride,
    const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type,
    int wedge_offset_x, int wedge_offset_y, int h, int w) {
660
661
  const int subh = (2 << b_height_log2_lookup[sb_type]) == h;
  const int subw = (2 << b_width_log2_lookup[sb_type]) == w;
662
663
  const uint8_t *mask;
  size_t mask_stride;
664
  switch (comp_data->interinter_compound_type) {
665
666
667
668
669
670
671
672
673
674
675
676
677
    case COMPOUND_WEDGE:
      mask = av1_get_soft_mask(comp_data->wedge_index, comp_data->wedge_sign,
                               sb_type, wedge_offset_x, wedge_offset_y);
      mask_stride = MASK_MASTER_STRIDE;
      break;
#if CONFIG_COMPOUND_SEGMENT
    case COMPOUND_SEG:
      mask = comp_data->seg_mask;
      mask_stride = block_size_wide[sb_type];
      break;
#endif
    default: assert(0); return;
  }
Yaowu Xu's avatar
Yaowu Xu committed
678
  aom_blend_a64_mask(dst, dst_stride, src0, src0_stride, src1, src1_stride,
679
                     mask, (int)mask_stride, h, w, subh, subw);
680
681
}

682
#if CONFIG_HIGHBITDEPTH
683
static void build_masked_compound_wedge_extend_highbd(
clang-format's avatar
clang-format committed
684
    uint8_t *dst_8, int dst_stride, const uint8_t *src0_8, int src0_stride,
685
686
687
    const uint8_t *src1_8, int src1_stride,
    const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type,
    int wedge_offset_x, int wedge_offset_y, int h, int w, int bd) {
688
689
  const int subh = (2 << b_height_log2_lookup[sb_type]) == h;
  const int subw = (2 << b_width_log2_lookup[sb_type]) == w;
690
691
  const uint8_t *mask;
  size_t mask_stride;
692
  switch (comp_data->interinter_compound_type) {
693
694
695
696
697
698
699
700
701
702
703
704
705
    case COMPOUND_WEDGE:
      mask = av1_get_soft_mask(comp_data->wedge_index, comp_data->wedge_sign,
                               sb_type, wedge_offset_x, wedge_offset_y);
      mask_stride = MASK_MASTER_STRIDE;
      break;
#if CONFIG_COMPOUND_SEGMENT
    case COMPOUND_SEG:
      mask = comp_data->seg_mask;
      mask_stride = block_size_wide[sb_type];
      break;
#endif
    default: assert(0); return;
  }
Yaowu Xu's avatar
Yaowu Xu committed
706
  aom_highbd_blend_a64_mask(dst_8, dst_stride, src0_8, src0_stride, src1_8,
707
708
                            src1_stride, mask, (int)mask_stride, h, w, subh,
                            subw, bd);
709
}
710
#endif  // CONFIG_HIGHBITDEPTH
711
#else
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
#if CONFIG_CONVOLVE_ROUND
static void build_masked_compound_no_round(
    CONV_BUF_TYPE *dst, int dst_stride, const CONV_BUF_TYPE *src0,
    int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride,
    const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h,
    int w) {
  // Derive subsampling from h and w passed in. May be refactored to
  // pass in subsampling factors directly.
  const int subh = (2 << b_height_log2_lookup[sb_type]) == h;
  const int subw = (2 << b_width_log2_lookup[sb_type]) == w;
  const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
  aom_blend_a64_d32_mask(dst, dst_stride, src0, src0_stride, src1, src1_stride,
                         mask, block_size_wide[sb_type], h, w, subh, subw);
}
#endif  // CONFIG_CONVOLVE_ROUND
727
728
729
730
731
static void build_masked_compound(
    uint8_t *dst, int dst_stride, const uint8_t *src0, int src0_stride,
    const uint8_t *src1, int src1_stride,
    const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h,
    int w) {
732
733
734
735
  // Derive subsampling from h and w passed in. May be refactored to
  // pass in subsampling factors directly.
  const int subh = (2 << b_height_log2_lookup[sb_type]) == h;
  const int subw = (2 << b_width_log2_lookup[sb_type]) == w;
736
  const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
Yaowu Xu's avatar
Yaowu Xu committed
737
  aom_blend_a64_mask(dst, dst_stride, src0, src0_stride, src1, src1_stride,
738
                     mask, block_size_wide[sb_type], h, w, subh, subw);
739
740
}

741
#if CONFIG_HIGHBITDEPTH
742
static void build_masked_compound_highbd(
clang-format's avatar
clang-format committed
743
    uint8_t *dst_8, int dst_stride, const uint8_t *src0_8, int src0_stride,
744
745
746
    const uint8_t *src1_8, int src1_stride,
    const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h,
    int w, int bd) {
747
748
749
750
  // Derive subsampling from h and w passed in. May be refactored to
  // pass in subsampling factors directly.
  const int subh = (2 << b_height_log2_lookup[sb_type]) == h;
  const int subw = (2 << b_width_log2_lookup[sb_type]) == w;
751
752
753
  const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
  // const uint8_t *mask =
  //     av1_get_contiguous_soft_mask(wedge_index, wedge_sign, sb_type);
754
755
756
  aom_highbd_blend_a64_mask(dst_8, dst_stride, src0_8, src0_stride, src1_8,
                            src1_stride, mask, block_size_wide[sb_type], h, w,
                            subh, subw, bd);
757
}
758
#endif  // CONFIG_HIGHBITDEPTH
759
#endif  // CONFIG_SUPERTX
760

Yaowu Xu's avatar
Yaowu Xu committed
761
762
763
764
void av1_make_masked_inter_predictor(const uint8_t *pre, int pre_stride,
                                     uint8_t *dst, int dst_stride,
                                     const int subpel_x, const int subpel_y,
                                     const struct scale_factors *sf, int w,
765
                                     int h, ConvolveParams *conv_params,
766
#if CONFIG_DUAL_FILTER
James Zern's avatar
James Zern committed
767
                                     const InterpFilter *interp_filter,
768
#else
James Zern's avatar
James Zern committed
769
                                     const InterpFilter interp_filter,
770
#endif
Yaowu Xu's avatar
Yaowu Xu committed
771
                                     int xs, int ys,
772
#if CONFIG_SUPERTX
Yaowu Xu's avatar
Yaowu Xu committed
773
                                     int wedge_offset_x, int wedge_offset_y,
774
#endif  // CONFIG_SUPERTX
775
                                     int plane,
776
777
778
779
#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
                                     const WarpTypesAllowed *warp_types,
                                     int p_col, int p_row, int ref,
#endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
780
                                     MACROBLOCKD *xd) {
781
782
  const MODE_INFO *mi = xd->mi[0];

783
784
785
786
787
788
789
790
791
792
793
  const INTERINTER_COMPOUND_DATA comp_data = {
#if CONFIG_WEDGE
    mi->mbmi.wedge_index,
    mi->mbmi.wedge_sign,
#endif  // CONFIG_WEDGE
#if CONFIG_COMPOUND_SEGMENT
    mi->mbmi.mask_type,
    xd->seg_mask,
#endif  // CONFIG_COMPOUND_SEGMENT
    mi->mbmi.interinter_compound_type
  };
794

795
#if CONFIG_HIGHBITDEPTH
796
797
#if CONFIG_CONVOLVE_ROUND
  DECLARE_ALIGNED(16, CONV_BUF_TYPE, tmp_dst2[MAX_SB_SQUARE]);
798
  memset(tmp_dst2, 0, sizeof(tmp_dst2));
799
800
801
802
803
804
805
806
807
808
  int tmp_dst2_stride = MAX_SB_SIZE;
  CONV_BUF_TYPE *org_dst = conv_params->dst;
  int org_dst_stride = conv_params->dst_stride;
  if (conv_params->round == CONVOLVE_OPT_NO_ROUND) {
    conv_params->dst = tmp_dst2;
    conv_params->dst_stride = tmp_dst2_stride;
    // mask compound has its own average mechanism
    conv_params->do_average = 0;
  }
#endif  // CONFIG_CONVOLVE_ROUND
809
  DECLARE_ALIGNED(16, uint8_t, tmp_dst_[2 * MAX_SB_SQUARE]);
clang-format's avatar
clang-format committed
810
811
812
  uint8_t *tmp_dst = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
                         ? CONVERT_TO_BYTEPTR(tmp_dst_)
                         : tmp_dst_;
Yaowu Xu's avatar
Yaowu Xu committed
813
  av1_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE, subpel_x,
814
                           subpel_y, sf, w, h, conv_params, interp_filter,
815
816
817
#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
                           warp_types, p_col, p_row, plane, ref,
#endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
818
819
820
#if CONFIG_MOTION_VAR
                           0, 0,
#endif
821
                           xs, ys, xd);
822
#if CONFIG_COMPOUND_SEGMENT
823
  if (!plane && comp_data.interinter_compound_type == COMPOUND_SEG) {
824
#if CONFIG_CONVOLVE_ROUND
825
826
827
828
829
830
    if (conv_params->round == CONVOLVE_OPT_NO_ROUND) {
      build_compound_seg_mask_d32(comp_data.seg_mask, comp_data.mask_type,
                                  org_dst, org_dst_stride, tmp_dst2,
                                  tmp_dst2_stride, mi->mbmi.sb_type, h, w,
                                  conv_params, xd->bd);
    } else {
831
#endif  // CONFIG_CONVOLVE_ROUND
832
833
834
835
836
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        build_compound_seg_mask_highbd(comp_data.seg_mask, comp_data.mask_type,
                                       dst, dst_stride, tmp_dst, MAX_SB_SIZE,
                                       mi->mbmi.sb_type, h, w, xd->bd);
      } else {
837
838
839
840
        build_compound_seg_mask(comp_data.seg_mask, comp_data.mask_type, dst,
                                dst_stride, tmp_dst, MAX_SB_SIZE,
                                mi->mbmi.sb_type, h, w);
      }
841
#if CONFIG_CONVOLVE_ROUND
842
    }
843
#endif
844
845
  }
#endif  // CONFIG_COMPOUND_SEGMENT
846
847
848
849

#if CONFIG_SUPERTX
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
    build_masked_compound_wedge_extend_highbd(
850
        dst, dst_stride, dst, dst_stride, tmp_dst, MAX_SB_SIZE, &comp_data,
851
852
853
        mi->mbmi.sb_type, wedge_offset_x, wedge_offset_y, h, w, xd->bd);
  else
    build_masked_compound_wedge_extend(
854
        dst, dst_stride, dst, dst_stride, tmp_dst, MAX_SB_SIZE, &comp_data,
855
856
        mi->mbmi.sb_type, wedge_offset_x, wedge_offset_y, h, w);
#else
857
#if CONFIG_CONVOLVE_ROUND
858
859
860
861
862
863
864
865
866
867
  if (conv_params->round == CONVOLVE_OPT_NO_ROUND) {
    build_masked_compound_no_round(org_dst, org_dst_stride, org_dst,
                                   org_dst_stride, tmp_dst2, tmp_dst2_stride,
                                   &comp_data, mi->mbmi.sb_type, h, w);
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      av1_highbd_convolve_rounding(
          org_dst, org_dst_stride, dst, dst_stride, w, h,
          FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1,
          xd->bd);
    } else {
868
869
870
      av1_convolve_rounding(
          org_dst, org_dst_stride, dst, dst_stride, w, h,
          FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1);
871
872
873
    }
    conv_params->do_post_rounding = 0;
  } else {
874
#endif  // CONFIG_CONVOLVE_ROUND
875
876
877
878
879
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      build_masked_compound_highbd(dst, dst_stride, dst, dst_stride, tmp_dst,
                                   MAX_SB_SIZE, &comp_data, mi->mbmi.sb_type, h,
                                   w, xd->bd);
    } else {
880
881
882
      build_masked_compound(dst, dst_stride, dst, dst_stride, tmp_dst,
                            MAX_SB_SIZE, &comp_data, mi->mbmi.sb_type, h, w);
    }
883
#if CONFIG_CONVOLVE_ROUND
884
  }
885
#endif  // CONFIG_CONVOLVE_ROUND
886
#endif  // CONFIG_SUPERTX
887

888
#else  // CONFIG_HIGHBITDEPTH
889
890
891
892
893
894
895
896
897
898
899
900
901
902

#if CONFIG_CONVOLVE_ROUND
  DECLARE_ALIGNED(16, CONV_BUF_TYPE, tmp_dst2[MAX_SB_SQUARE]);
  int tmp_dst2_stride = MAX_SB_SIZE;
  CONV_BUF_TYPE *org_dst = conv_params->dst;
  int org_dst_stride = conv_params->dst_stride;
  if (conv_params->round == CONVOLVE_OPT_NO_ROUND) {
    memset(tmp_dst2, 0, sizeof(tmp_dst2));
    conv_params->dst = tmp_dst2;
    conv_params->dst_stride = tmp_dst2_stride;
    // mask compound has its own average mechanism
    conv_params->do_average = 0;
  }
#endif
903
  DECLARE_ALIGNED(16, uint8_t, tmp_dst[MAX_SB_SQUARE]);
Yaowu Xu's avatar
Yaowu Xu committed
904
  av1_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE, subpel_x,
905
                           subpel_y, sf, w, h, conv_params, interp_filter,
906
907
908
#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
                           warp_types, p_col, p_row, plane, ref,
#endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
909
910
911
#if CONFIG_MOTION_VAR
                           0, 0,
#endif
912
                           xs, ys, xd);
913
#if CONFIG_COMPOUND_SEGMENT
914
915
916
917
918
  if (!plane && comp_data.interinter_compound_type == COMPOUND_SEG) {
#if CONFIG_CONVOLVE_ROUND
    if (conv_params->round == CONVOLVE_OPT_NO_ROUND) {
      build_compound_seg_mask_d32(
          comp_data.seg_mask, comp_data.mask_type, org_dst, org_dst_stride,
919
          tmp_dst2, tmp_dst2_stride, mi->mbmi.sb_type, h, w, conv_params, 8);
920
921
922
923
924
925
926
927
928
    } else {
#endif  // CONFIG_CONVOLVE_ROUND
      build_compound_seg_mask(comp_data.seg_mask, comp_data.mask_type, dst,
                              dst_stride, tmp_dst, MAX_SB_SIZE,
                              mi->mbmi.sb_type, h, w);
#if CONFIG_CONVOLVE_ROUND
    }
#endif
  }
929
#endif  // CONFIG_COMPOUND_SEGMENT
930
931
#if CONFIG_SUPERTX
  build_masked_compound_wedge_extend(dst, dst_stride, dst, dst_stride, tmp_dst,
932
                                     MAX_SB_SIZE, &comp_data, mi->mbmi.sb_type,
933
934
                                     wedge_offset_x, wedge_offset_y, h, w);
#else
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
#if CONFIG_CONVOLVE_ROUND
  if (conv_params->round == CONVOLVE_OPT_NO_ROUND) {
    build_masked_compound_no_round(org_dst, org_dst_stride, org_dst,
                                   org_dst_stride, tmp_dst2, tmp_dst2_stride,
                                   &comp_data, mi->mbmi.sb_type, h, w);
    av1_convolve_rounding(
        org_dst, org_dst_stride, dst, dst_stride, w, h,
        FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1);
    conv_params->do_post_rounding = 0;
  } else {
#endif  // CONFIG_CONVOLVE_ROUND
    build_masked_compound(dst, dst_stride, dst, dst_stride, tmp_dst,
                          MAX_SB_SIZE, &comp_data, mi->mbmi.sb_type, h, w);
#if CONFIG_CONVOLVE_ROUND
  }
#endif  // CONFIG_CONVOLVE_ROUND
951
#endif  // CONFIG_SUPERTX
952
#endif  // CONFIG_HIGHBITDEPTH
953
954
955
#if CONFIG_COMPOUND_SEGMENT
  (void)plane;
#endif  // CONFIG_COMPOUND_SEGMENT
956
957
}
#endif  // CONFIG_EXT_INTER
958

959
960
961
// TODO(sarahparker) av1_highbd_build_inter_predictor and
// av1_build_inter_predictor should be combined with
// av1_make_inter_predictor
962
#if CONFIG_HIGHBITDEPTH
963
964
965
void av1_highbd_build_inter_predictor(
    const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride,
    const MV *src_mv, const struct scale_factors *sf, int w, int h, int ref,
966
#if CONFIG_DUAL_FILTER
967
    const InterpFilter *interp_filter,
968
#else
969
    const InterpFilter interp_filter,
970
#endif
971
972
973
974
975
#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
    const WarpTypesAllowed *warp_types, int p_col, int p_row,
#endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
    int plane, enum mv_precision precision, int x, int y,
    const MACROBLOCKD *xd) {
Jingning Han's avatar
Jingning Han committed
976
977
978
  const int is_q4 = precision == MV_PRECISION_Q4;
  const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row * 2,
                     is_q4 ? src_mv->col : src_mv->col * 2 };
Yaowu Xu's avatar
Yaowu Xu committed
979
  MV32 mv = av1_scale_mv(&mv_q4, x, y, sf);
Fergus Simpson's avatar
Fergus Simpson committed
980
981
  mv.col += SCALE_EXTRA_OFF;
  mv.row += SCALE_EXTRA_OFF;
982
983
  const int subpel_x = mv.col & SCALE_SUBPEL_MASK;
  const int subpel_y = mv.row & SCALE_SUBPEL_MASK;
984
  ConvolveParams conv_params = get_conv_params(ref, ref, plane);
Jingning Han's avatar
Jingning Han committed
985

986
987
  src += (mv.row >> SCALE_SUBPEL_BITS) * src_stride +
         (mv.col >> SCALE_SUBPEL_BITS);
Jingning Han's avatar
Jingning Han committed
988

989
990
  av1_make_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
                           sf, w, h, &conv_params, interp_filter,
991
992
993
#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
                           warp_types, p_col, p_row, plane, ref,
#endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
994
995
996
#if CONFIG_MOTION_VAR
                           0, 0,
#endif
997
998
                           sf->x_step_q4, sf->y_step_q4, xd);
}
999
#endif  // CONFIG_HIGHBITDEPTH
1000

Yaowu Xu's avatar
Yaowu Xu committed
1001
1002
1003
void av1_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
                               int dst_stride, const MV *src_mv,
                               const struct scale_factors *sf, int w, int h,
1004
                               ConvolveParams *conv_params,
1005
#if CONFIG_DUAL_FILTER
James Zern's avatar
James Zern committed
1006