reconinter.c 113 KB
Newer Older
Jingning Han's avatar
Jingning Han committed
1
/*
Yaowu Xu's avatar
Yaowu Xu committed
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Jingning Han's avatar
Jingning Han committed
3
 *
Yaowu Xu's avatar
Yaowu Xu committed
4 5 6 7 8 9
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Jingning Han's avatar
Jingning Han committed
10 11 12
 */

#include <assert.h>
13
#include <stdio.h>
14
#include <limits.h>
Jingning Han's avatar
Jingning Han committed
15

Yaowu Xu's avatar
Yaowu Xu committed
16 17 18
#include "./aom_scale_rtcd.h"
#include "./aom_dsp_rtcd.h"
#include "./aom_config.h"
Jingning Han's avatar
Jingning Han committed
19

Yaowu Xu's avatar
Yaowu Xu committed
20
#include "aom/aom_integer.h"
21
#include "aom_dsp/blend.h"
Jingning Han's avatar
Jingning Han committed
22

23 24 25 26
#include "av1/common/blockd.h"
#include "av1/common/reconinter.h"
#include "av1/common/reconintra.h"
#include "av1/common/onyxc_int.h"
27
#include "av1/common/obmc.h"
Jingning Han's avatar
Jingning Han committed
28

29 30 31 32 33 34 35 36 37 38
// This function will determine whether or not to create a warped
// prediction and return the appropriate motion model depending
// on the configuration. Behavior will change with different
// combinations of GLOBAL_MOTION, WARPED_MOTION and MOTION_VAR.
static INLINE int allow_warp(const MODE_INFO *const mi,
                             const WarpTypesAllowed *const warp_types,
                             const WarpedMotionParams *const gm_params,
                             int build_for_obmc,
                             WarpedMotionParams *final_warp_params) {
  const MB_MODE_INFO *const mbmi = &mi->mbmi;
39
  *final_warp_params = default_warp_params;
40

41 42
  if (mbmi->wm_params[0].invalid) return 0;

43
  // Motion var and global motion configured
44

45
  // Motion var and warped motion configured
46

47
  // Motion var, warped motion and global motion all configured
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
  if (warp_types->local_warp_allowed) {
    if ((build_for_obmc && WARP_WM_NEIGHBORS_WITH_OBMC) || (!build_for_obmc)) {
      memcpy(final_warp_params, &mbmi->wm_params[0],
             sizeof(*final_warp_params));
      return 1;
    }
  } else if (warp_types->global_warp_allowed &&
             (WARP_GM_NEIGHBORS_WITH_OBMC || !build_for_obmc)) {
    memcpy(final_warp_params, gm_params, sizeof(*final_warp_params));
    return 1;
  }

  return 0;
}

static INLINE void av1_make_inter_predictor(
    const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride,
    const int subpel_x, const int subpel_y, const struct scale_factors *sf,
66
    int w, int h, ConvolveParams *conv_params, InterpFilters interp_filters,
67
    const WarpTypesAllowed *warp_types, int p_col, int p_row, int plane,
68 69
    int ref, const MODE_INFO *mi, int build_for_obmc, int xs, int ys,
    const MACROBLOCKD *xd) {
70 71
  (void)xd;

72
  // Make sure the selected motion mode is valid for this configuration
73
  assert_motion_mode_valid(mi->mbmi.motion_mode, 0, xd->global_motion, xd, mi);
74 75

  WarpedMotionParams final_warp_params;
76 77 78
  const int do_warp =
      (w >= 8 && h >= 8 &&
       allow_warp(mi, warp_types,
79
#if CONFIG_COMPOUND_SINGLEREF
80 81 82 83 84 85
                  // TODO(zoeliu): To further check the single
                  // ref comp mode to work together with
                  //               global motion.
                  has_second_ref(&mi->mbmi)
                      ? &xd->global_motion[mi->mbmi.ref_frame[ref]]
                      : &xd->global_motion[mi->mbmi.ref_frame[0]],
86
#else   // !(CONFIG_COMPOUND_SINGLEREF)
87
                  &xd->global_motion[mi->mbmi.ref_frame[ref]],
88
#endif  // CONFIG_COMPOUND_SINGLEREF
89
                  build_for_obmc, &final_warp_params));
RogerZhou's avatar
RogerZhou committed
90 91
  if (do_warp
#if CONFIG_AMVR
92
      && xd->cur_frame_force_integer_mv == 0
RogerZhou's avatar
RogerZhou committed
93 94
#endif
      ) {
95 96 97 98 99 100 101 102 103 104 105 106 107 108
    const struct macroblockd_plane *const pd = &xd->plane[plane];
    const struct buf_2d *const pre_buf = &pd->pre[ref];
    av1_warp_plane(&final_warp_params,
#if CONFIG_HIGHBITDEPTH
                   xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd,
#endif  // CONFIG_HIGHBITDEPTH
                   pre_buf->buf0, pre_buf->width, pre_buf->height,
                   pre_buf->stride, dst, p_col, p_row, w, h, dst_stride,
                   pd->subsampling_x, pd->subsampling_y, xs, ys, conv_params);
    return;
  }
#if CONFIG_HIGHBITDEPTH
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
109
                           sf, w, h, conv_params, interp_filters, xs, ys,
110 111 112 113 114
                           xd->bd);
    return;
  }
#endif  // CONFIG_HIGHBITDEPTH
  inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, sf, w,
115
                  h, conv_params, interp_filters, xs, ys);
116 117
}

clang-format's avatar
clang-format committed
118
#define NSMOOTHERS 1
119

120
// [smoother][negative][direction]
clang-format's avatar
clang-format committed
121 122 123
DECLARE_ALIGNED(16, static uint8_t,
                wedge_mask_obl[NSMOOTHERS][2][WEDGE_DIRECTIONS]
                              [MASK_MASTER_SIZE * MASK_MASTER_SIZE]);
124

clang-format's avatar
clang-format committed
125
DECLARE_ALIGNED(16, static uint8_t,
126
                wedge_signflip_lookup[BLOCK_SIZES_ALL][MAX_WEDGE_TYPES]);
127

128
// 4 * MAX_WEDGE_SQUARE is an easy to compute and fairly tight upper bound
129
// on the sum of all mask sizes up to an including MAX_WEDGE_SQUARE.
clang-format's avatar
clang-format committed
130
DECLARE_ALIGNED(16, static uint8_t,
131
                wedge_mask_buf[2 * MAX_WEDGE_TYPES * 4 * MAX_WEDGE_SQUARE]);
132

133
static wedge_masks_type wedge_masks[BLOCK_SIZES_ALL][2];
134

135
// Some unused wedge codebooks left temporarily to facilitate experiments.
136 137
// To be removed when settled.
/*
138
static wedge_code_type wedge_codebook_8_hgtw[8] = {
clang-format's avatar
clang-format committed
139 140 141 142
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
143 144
};

145
static wedge_code_type wedge_codebook_8_hltw[8] = {
clang-format's avatar
clang-format committed
146 147 148 149
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
150 151
};

152
static wedge_code_type wedge_codebook_8_heqw[8] = {
clang-format's avatar
clang-format committed
153 154 155 156
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
  { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
  { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 6, 4 },
157
};
158 159

static const wedge_code_type wedge_codebook_32_hgtw[32] = {
clang-format's avatar
clang-format committed
160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
  { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 },
  { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 },
  { WEDGE_OBLIQUE27, 4, 1 },  { WEDGE_OBLIQUE27, 4, 2 },
  { WEDGE_OBLIQUE27, 4, 3 },  { WEDGE_OBLIQUE27, 4, 5 },
  { WEDGE_OBLIQUE27, 4, 6 },  { WEDGE_OBLIQUE27, 4, 7 },
  { WEDGE_OBLIQUE153, 4, 1 }, { WEDGE_OBLIQUE153, 4, 2 },
  { WEDGE_OBLIQUE153, 4, 3 }, { WEDGE_OBLIQUE153, 4, 5 },
  { WEDGE_OBLIQUE153, 4, 6 }, { WEDGE_OBLIQUE153, 4, 7 },
  { WEDGE_OBLIQUE63, 1, 4 },  { WEDGE_OBLIQUE63, 2, 4 },
  { WEDGE_OBLIQUE63, 3, 4 },  { WEDGE_OBLIQUE63, 5, 4 },
  { WEDGE_OBLIQUE63, 6, 4 },  { WEDGE_OBLIQUE63, 7, 4 },
  { WEDGE_OBLIQUE117, 1, 4 }, { WEDGE_OBLIQUE117, 2, 4 },
  { WEDGE_OBLIQUE117, 3, 4 }, { WEDGE_OBLIQUE117, 5, 4 },
  { WEDGE_OBLIQUE117, 6, 4 }, { WEDGE_OBLIQUE117, 7, 4 },
176 177
};

178
static const wedge_code_type wedge_codebook_32_hltw[32] = {
clang-format's avatar
clang-format committed
179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
  { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 4, 4 },
  { WEDGE_VERTICAL, 6, 4 },   { WEDGE_HORIZONTAL, 4, 4 },
  { WEDGE_OBLIQUE27, 4, 1 },  { WEDGE_OBLIQUE27, 4, 2 },
  { WEDGE_OBLIQUE27, 4, 3 },  { WEDGE_OBLIQUE27, 4, 5 },
  { WEDGE_OBLIQUE27, 4, 6 },  { WEDGE_OBLIQUE27, 4, 7 },
  { WEDGE_OBLIQUE153, 4, 1 }, { WEDGE_OBLIQUE153, 4, 2 },
  { WEDGE_OBLIQUE153, 4, 3 }, { WEDGE_OBLIQUE153, 4, 5 },
  { WEDGE_OBLIQUE153, 4, 6 }, { WEDGE_OBLIQUE153, 4, 7 },
  { WEDGE_OBLIQUE63, 1, 4 },  { WEDGE_OBLIQUE63, 2, 4 },
  { WEDGE_OBLIQUE63, 3, 4 },  { WEDGE_OBLIQUE63, 5, 4 },
  { WEDGE_OBLIQUE63, 6, 4 },  { WEDGE_OBLIQUE63, 7, 4 },
  { WEDGE_OBLIQUE117, 1, 4 }, { WEDGE_OBLIQUE117, 2, 4 },
  { WEDGE_OBLIQUE117, 3, 4 }, { WEDGE_OBLIQUE117, 5, 4 },
  { WEDGE_OBLIQUE117, 6, 4 }, { WEDGE_OBLIQUE117, 7, 4 },
195 196
};

197
static const wedge_code_type wedge_codebook_32_heqw[32] = {
clang-format's avatar
clang-format committed
198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
  { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
  { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 6, 4 },
  { WEDGE_OBLIQUE27, 4, 1 },  { WEDGE_OBLIQUE27, 4, 2 },
  { WEDGE_OBLIQUE27, 4, 3 },  { WEDGE_OBLIQUE27, 4, 5 },
  { WEDGE_OBLIQUE27, 4, 6 },  { WEDGE_OBLIQUE27, 4, 7 },
  { WEDGE_OBLIQUE153, 4, 1 }, { WEDGE_OBLIQUE153, 4, 2 },
  { WEDGE_OBLIQUE153, 4, 3 }, { WEDGE_OBLIQUE153, 4, 5 },
  { WEDGE_OBLIQUE153, 4, 6 }, { WEDGE_OBLIQUE153, 4, 7 },
  { WEDGE_OBLIQUE63, 1, 4 },  { WEDGE_OBLIQUE63, 2, 4 },
  { WEDGE_OBLIQUE63, 3, 4 },  { WEDGE_OBLIQUE63, 5, 4 },
  { WEDGE_OBLIQUE63, 6, 4 },  { WEDGE_OBLIQUE63, 7, 4 },
  { WEDGE_OBLIQUE117, 1, 4 }, { WEDGE_OBLIQUE117, 2, 4 },
  { WEDGE_OBLIQUE117, 3, 4 }, { WEDGE_OBLIQUE117, 5, 4 },
  { WEDGE_OBLIQUE117, 6, 4 }, { WEDGE_OBLIQUE117, 7, 4 },
214
};
215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248
*/

static const wedge_code_type wedge_codebook_16_hgtw[16] = {
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
  { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 },
  { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 },
  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
};

static const wedge_code_type wedge_codebook_16_hltw[16] = {
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
  { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 4, 4 },
  { WEDGE_VERTICAL, 6, 4 },   { WEDGE_HORIZONTAL, 4, 4 },
  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
};

static const wedge_code_type wedge_codebook_16_heqw[16] = {
  { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
  { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
  { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
  { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 6, 4 },
  { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
  { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
  { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
  { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
};
249

250
const wedge_params_type wedge_params_lookup[BLOCK_SIZES_ALL] = {
251 252 253
  { 0, NULL, NULL, 0, NULL },
  { 0, NULL, NULL, 0, NULL },
  { 0, NULL, NULL, 0, NULL },
clang-format's avatar
clang-format committed
254 255 256
  { 0, NULL, NULL, 0, NULL },
  { 0, NULL, NULL, 0, NULL },
  { 0, NULL, NULL, 0, NULL },
257
  { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_8X8], 0,
258
    wedge_masks[BLOCK_8X8] },
259
  { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X16], 0,
260
    wedge_masks[BLOCK_8X16] },
261
  { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X8], 0,
262
    wedge_masks[BLOCK_16X8] },
263
  { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_16X16], 0,
264
    wedge_masks[BLOCK_16X16] },
265
  { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_16X32], 0,
266
    wedge_masks[BLOCK_16X32] },
267
  { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X16], 0,
268
    wedge_masks[BLOCK_32X16] },
269
  { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_32X32], 0,
270
    wedge_masks[BLOCK_32X32] },
271 272 273
  { 0, NULL, NULL, 0, NULL },
  { 0, NULL, NULL, 0, NULL },
  { 0, NULL, NULL, 0, NULL },
Debargha Mukherjee's avatar
Debargha Mukherjee committed
274
#if CONFIG_EXT_PARTITION
clang-format's avatar
clang-format committed
275 276 277
  { 0, NULL, NULL, 0, NULL },
  { 0, NULL, NULL, 0, NULL },
  { 0, NULL, NULL, 0, NULL },
Debargha Mukherjee's avatar
Debargha Mukherjee committed
278
#endif  // CONFIG_EXT_PARTITION
279
  { 0, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_4X16], 0,
280
    wedge_masks[BLOCK_4X16] },
281
  { 0, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X4], 0,
282 283 284 285 286
    wedge_masks[BLOCK_16X4] },
  { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X32], 0,
    wedge_masks[BLOCK_8X32] },
  { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X8], 0,
    wedge_masks[BLOCK_32X8] },
287 288
  { 0, NULL, NULL, 0, NULL },
  { 0, NULL, NULL, 0, NULL },
289 290 291 292
#if CONFIG_EXT_PARTITION
  { 0, NULL, NULL, 0, NULL },
  { 0, NULL, NULL, 0, NULL },
#endif  // CONFIG_EXT_PARTITION
293
};
294

clang-format's avatar
clang-format committed
295
static const uint8_t *get_wedge_mask_inplace(int wedge_index, int neg,
296
                                             BLOCK_SIZE sb_type) {
Debargha Mukherjee's avatar
Debargha Mukherjee committed
297
  const uint8_t *master;
298 299
  const int bh = block_size_high[sb_type];
  const int bw = block_size_wide[sb_type];
300 301 302
  const wedge_code_type *a =
      wedge_params_lookup[sb_type].codebook + wedge_index;
  const int smoother = wedge_params_lookup[sb_type].smoother;
303
  int woff, hoff;
304 305 306 307 308 309 310
  const uint8_t wsignflip = wedge_params_lookup[sb_type].signflip[wedge_index];

  assert(wedge_index >= 0 &&
         wedge_index < (1 << get_wedge_bits_lookup(sb_type)));
  woff = (a->x_offset * bw) >> 3;
  hoff = (a->y_offset * bh) >> 3;
  master = wedge_mask_obl[smoother][neg ^ wsignflip][a->direction] +
clang-format's avatar
clang-format committed
311 312
           MASK_MASTER_STRIDE * (MASK_MASTER_SIZE / 2 - hoff) +
           MASK_MASTER_SIZE / 2 - woff;
Debargha Mukherjee's avatar
Debargha Mukherjee committed
313 314 315
  return master;
}

Yaowu Xu's avatar
Yaowu Xu committed
316 317 318
const uint8_t *av1_get_soft_mask(int wedge_index, int wedge_sign,
                                 BLOCK_SIZE sb_type, int offset_x,
                                 int offset_y) {
319
  const uint8_t *mask =
320
      get_wedge_mask_inplace(wedge_index, wedge_sign, sb_type);
clang-format's avatar
clang-format committed
321
  if (mask) mask -= (offset_x + offset_y * MASK_MASTER_STRIDE);
322
  return mask;
323 324
}

325 326 327 328 329 330 331 332 333 334 335 336 337
static uint8_t *invert_mask(uint8_t *mask_inv_buffer, const uint8_t *const mask,
                            int h, int w, int stride) {
  int i, j;

  for (i = 0; i < h; ++i)
    for (j = 0; j < w; ++j) {
      mask_inv_buffer[i * stride + j] =
          AOM_BLEND_A64_MAX_ALPHA - mask[i * stride + j];
    }
  return mask_inv_buffer;
}

const uint8_t *av1_get_compound_type_mask_inverse(
338 339
    const INTERINTER_COMPOUND_DATA *const comp_data, uint8_t *mask_buffer,
    int h, int w, int stride, BLOCK_SIZE sb_type) {
340
  assert(is_masked_compound_type(comp_data->interinter_compound_type));
341
  (void)sb_type;
342
  switch (comp_data->interinter_compound_type) {
343
    case COMPOUND_WEDGE:
344 345
      return av1_get_contiguous_soft_mask(comp_data->wedge_index,
                                          !comp_data->wedge_sign, sb_type);
346
    case COMPOUND_SEG:
347
      return invert_mask(mask_buffer, comp_data->seg_mask, h, w, stride);
348 349 350
    default: assert(0); return NULL;
  }
}
351

352 353
const uint8_t *av1_get_compound_type_mask(
    const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type) {
354
  assert(is_masked_compound_type(comp_data->interinter_compound_type));
355
  (void)sb_type;
356
  switch (comp_data->interinter_compound_type) {
357 358 359 360 361 362 363 364
    case COMPOUND_WEDGE:
      return av1_get_contiguous_soft_mask(comp_data->wedge_index,
                                          comp_data->wedge_sign, sb_type);
    case COMPOUND_SEG: return comp_data->seg_mask;
    default: assert(0); return NULL;
  }
}

365 366 367
#if COMPOUND_SEGMENT_TYPE == 0
static void uniform_mask(uint8_t *mask, int which_inverse, BLOCK_SIZE sb_type,
                         int h, int w, int mask_val) {
368 369 370 371 372 373 374 375 376 377
  int i, j;
  int block_stride = block_size_wide[sb_type];
  for (i = 0; i < h; ++i)
    for (j = 0; j < w; ++j) {
      mask[i * block_stride + j] =
          which_inverse ? AOM_BLEND_A64_MAX_ALPHA - mask_val : mask_val;
    }
}

void build_compound_seg_mask(uint8_t *mask, SEG_MASK_TYPE mask_type,
378 379 380 381 382 383 384
                             const uint8_t *src0, int src0_stride,
                             const uint8_t *src1, int src1_stride,
                             BLOCK_SIZE sb_type, int h, int w) {
  (void)src0;
  (void)src1;
  (void)src0_stride;
  (void)src1_stride;
385 386 387 388 389
  switch (mask_type) {
    case UNIFORM_45: uniform_mask(mask, 0, sb_type, h, w, 45); break;
    case UNIFORM_45_INV: uniform_mask(mask, 1, sb_type, h, w, 45); break;
    default: assert(0);
  }
390
}
391

392
#if CONFIG_HIGHBITDEPTH
393 394 395 396 397 398 399 400 401 402 403 404 405 406 407
void build_compound_seg_mask_highbd(uint8_t *mask, SEG_MASK_TYPE mask_type,
                                    const uint8_t *src0, int src0_stride,
                                    const uint8_t *src1, int src1_stride,
                                    BLOCK_SIZE sb_type, int h, int w, int bd) {
  (void)src0;
  (void)src1;
  (void)src0_stride;
  (void)src1_stride;
  (void)bd;
  switch (mask_type) {
    case UNIFORM_45: uniform_mask(mask, 0, sb_type, h, w, 45); break;
    case UNIFORM_45_INV: uniform_mask(mask, 1, sb_type, h, w, 45); break;
    default: assert(0);
  }
}
408
#endif  // CONFIG_HIGHBITDEPTH
409 410 411

#elif COMPOUND_SEGMENT_TYPE == 1
#define DIFF_FACTOR 16
412 413 414 415 416 417

#if CONFIG_CONVOLVE_ROUND
static void diffwtd_mask_d32(uint8_t *mask, int which_inverse, int mask_base,
                             const int32_t *src0, int src0_stride,
                             const int32_t *src1, int src1_stride,
                             BLOCK_SIZE sb_type, int h, int w,
418 419 420
                             ConvolveParams *conv_params, int bd) {
  int round =
      2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1 + (bd - 8);
421 422 423 424
  int i, j, m, diff;
  int block_stride = block_size_wide[sb_type];
  for (i = 0; i < h; ++i) {
    for (j = 0; j < w; ++j) {
425 426
      diff = abs(src0[i * src0_stride + j] - src1[i * src1_stride + j]);
      diff = ROUND_POWER_OF_TWO(diff, round);
427 428 429 430 431 432 433 434 435 436 437
      m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
      mask[i * block_stride + j] =
          which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
    }
  }
}

static void build_compound_seg_mask_d32(uint8_t *mask, SEG_MASK_TYPE mask_type,
                                        const int32_t *src0, int src0_stride,
                                        const int32_t *src1, int src1_stride,
                                        BLOCK_SIZE sb_type, int h, int w,
438
                                        ConvolveParams *conv_params, int bd) {
439 440 441
  switch (mask_type) {
    case DIFFWTD_38:
      diffwtd_mask_d32(mask, 0, 38, src0, src0_stride, src1, src1_stride,
442
                       sb_type, h, w, conv_params, bd);
443 444 445
      break;
    case DIFFWTD_38_INV:
      diffwtd_mask_d32(mask, 1, 38, src0, src0_stride, src1, src1_stride,
446
                       sb_type, h, w, conv_params, bd);
447 448 449 450
      break;
    default: assert(0);
  }
}
451
#endif  // CONFIG_CONVOLVE_ROUND
452

453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474
static void diffwtd_mask(uint8_t *mask, int which_inverse, int mask_base,
                         const uint8_t *src0, int src0_stride,
                         const uint8_t *src1, int src1_stride,
                         BLOCK_SIZE sb_type, int h, int w) {
  int i, j, m, diff;
  int block_stride = block_size_wide[sb_type];
  for (i = 0; i < h; ++i) {
    for (j = 0; j < w; ++j) {
      diff =
          abs((int)src0[i * src0_stride + j] - (int)src1[i * src1_stride + j]);
      m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
      mask[i * block_stride + j] =
          which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
    }
  }
}

void build_compound_seg_mask(uint8_t *mask, SEG_MASK_TYPE mask_type,
                             const uint8_t *src0, int src0_stride,
                             const uint8_t *src1, int src1_stride,
                             BLOCK_SIZE sb_type, int h, int w) {
  switch (mask_type) {
475 476
    case DIFFWTD_38:
      diffwtd_mask(mask, 0, 38, src0, src0_stride, src1, src1_stride, sb_type,
477 478
                   h, w);
      break;
479 480
    case DIFFWTD_38_INV:
      diffwtd_mask(mask, 1, 38, src0, src0_stride, src1, src1_stride, sb_type,
481 482 483 484 485 486
                   h, w);
      break;
    default: assert(0);
  }
}

487
#if CONFIG_HIGHBITDEPTH
488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510
static void diffwtd_mask_highbd(uint8_t *mask, int which_inverse, int mask_base,
                                const uint16_t *src0, int src0_stride,
                                const uint16_t *src1, int src1_stride,
                                BLOCK_SIZE sb_type, int h, int w, int bd) {
  int i, j, m, diff;
  int block_stride = block_size_wide[sb_type];
  for (i = 0; i < h; ++i) {
    for (j = 0; j < w; ++j) {
      diff = abs((int)src0[i * src0_stride + j] -
                 (int)src1[i * src1_stride + j]) >>
             (bd - 8);
      m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
      mask[i * block_stride + j] =
          which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
    }
  }
}

void build_compound_seg_mask_highbd(uint8_t *mask, SEG_MASK_TYPE mask_type,
                                    const uint8_t *src0, int src0_stride,
                                    const uint8_t *src1, int src1_stride,
                                    BLOCK_SIZE sb_type, int h, int w, int bd) {
  switch (mask_type) {
Yaowu Xu's avatar
Yaowu Xu committed
511
    case DIFFWTD_38:
512
      diffwtd_mask_highbd(mask, 0, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
513 514 515
                          CONVERT_TO_SHORTPTR(src1), src1_stride, sb_type, h, w,
                          bd);
      break;
Yaowu Xu's avatar
Yaowu Xu committed
516
    case DIFFWTD_38_INV:
517
      diffwtd_mask_highbd(mask, 1, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
518 519 520 521 522 523
                          CONVERT_TO_SHORTPTR(src1), src1_stride, sb_type, h, w,
                          bd);
      break;
    default: assert(0);
  }
}
524
#endif  // CONFIG_HIGHBITDEPTH
525
#endif  // COMPOUND_SEGMENT_TYPE
526

527 528 529 530
#if MASK_MASTER_SIZE == 64
static const uint8_t wedge_master_oblique_odd[NSMOOTHERS][MASK_MASTER_SIZE] = {
  {
      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
531 532
      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  2,  6,  18,
      37, 53, 60, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
533 534 535 536 537 538
      64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
  }
};
static const uint8_t wedge_master_oblique_even[NSMOOTHERS][MASK_MASTER_SIZE] = {
  {
      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
539 540
      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  4,  11, 27,
      46, 58, 62, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
541 542 543 544 545
      64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
  }
};
static const uint8_t wedge_master_vertical[NSMOOTHERS][MASK_MASTER_SIZE] = { {
    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
546 547
    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  7,  21,
    43, 57, 62, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
548 549 550 551 552 553 554 555 556 557 558 559 560 561
    64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
} };

static void shift_copy(const uint8_t *src, uint8_t *dst, int shift, int width) {
  if (shift >= 0) {
    memcpy(dst + shift, src, width - shift);
    memset(dst, src[0], shift);
  } else {
    shift = -shift;
    memcpy(dst, src + shift, width - shift);
    memset(dst + width - shift, src[width - 1], shift);
  }
}
#else
562
static const double smoother_param[NSMOOTHERS] = { 3.0 };
563 564
#endif  // MASK_MASTER_SIZE == 64

565
static void init_wedge_master_masks() {
566 567 568 569 570
  int i, j, s;
  const int w = MASK_MASTER_SIZE;
  const int h = MASK_MASTER_SIZE;
  const int stride = MASK_MASTER_STRIDE;
  for (s = 0; s < NSMOOTHERS; s++) {
571
// Note: index [0] stores the masters, and [1] its complement.
572 573 574 575 576
#if MASK_MASTER_SIZE == 64
    // Generate prototype by shifting the masters
    int shift = h / 4;
    for (i = 0; i < h; i += 2) {
      shift_copy(wedge_master_oblique_even[s],
577
                 &wedge_mask_obl[s][0][WEDGE_OBLIQUE63][i * stride], shift,
578 579 580
                 MASK_MASTER_SIZE);
      shift--;
      shift_copy(wedge_master_oblique_odd[s],
581
                 &wedge_mask_obl[s][0][WEDGE_OBLIQUE63][(i + 1) * stride],
582
                 shift, MASK_MASTER_SIZE);
583
      memcpy(&wedge_mask_obl[s][0][WEDGE_VERTICAL][i * stride],
584 585
             wedge_master_vertical[s],
             MASK_MASTER_SIZE * sizeof(wedge_master_vertical[s][0]));
586
      memcpy(&wedge_mask_obl[s][0][WEDGE_VERTICAL][(i + 1) * stride],
587 588 589 590 591 592 593
             wedge_master_vertical[s],
             MASK_MASTER_SIZE * sizeof(wedge_master_vertical[s][0]));
    }
#else
    const int a[2] = { 2, 1 };
    const double asqrt = sqrt(a[0] * a[0] + a[1] * a[1]);
    for (i = 0; i < h; i++) {
594 595 596
      for (j = 0; j < w; ++j) {
        int x = (2 * j + 1 - w);
        int y = (2 * i + 1 - h);
597 598
        double d = (a[0] * x + a[1] * y) / asqrt;
        const int msk = (int)rint((1.0 + tanh(d / smoother_param[s])) * 32);
599
        wedge_mask_obl[s][0][WEDGE_OBLIQUE63][i * stride + j] = msk;
600
        const int mskx = (int)rint((1.0 + tanh(x / smoother_param[s])) * 32);
601
        wedge_mask_obl[s][0][WEDGE_VERTICAL][i * stride + j] = mskx;
602 603 604 605 606
      }
    }
#endif  // MASK_MASTER_SIZE == 64
    for (i = 0; i < h; ++i) {
      for (j = 0; j < w; ++j) {
607 608
        const int msk = wedge_mask_obl[s][0][WEDGE_OBLIQUE63][i * stride + j];
        wedge_mask_obl[s][0][WEDGE_OBLIQUE27][j * stride + i] = msk;
609
        wedge_mask_obl[s][0][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
clang-format's avatar
clang-format committed
610
            wedge_mask_obl[s][0][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] =
611 612 613 614 615 616
                (1 << WEDGE_WEIGHT_BITS) - msk;
        wedge_mask_obl[s][1][WEDGE_OBLIQUE63][i * stride + j] =
            wedge_mask_obl[s][1][WEDGE_OBLIQUE27][j * stride + i] =
                (1 << WEDGE_WEIGHT_BITS) - msk;
        wedge_mask_obl[s][1][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
            wedge_mask_obl[s][1][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] =
617
                msk;
618 619 620 621
        const int mskx = wedge_mask_obl[s][0][WEDGE_VERTICAL][i * stride + j];
        wedge_mask_obl[s][0][WEDGE_HORIZONTAL][j * stride + i] = mskx;
        wedge_mask_obl[s][1][WEDGE_VERTICAL][i * stride + j] =
            wedge_mask_obl[s][1][WEDGE_HORIZONTAL][j * stride + i] =
622
                (1 << WEDGE_WEIGHT_BITS) - mskx;
623
      }
624
    }
625
  }
626 627 628 629 630 631 632 633
}

// If the signs for the wedges for various blocksizes are
// inconsistent flip the sign flag. Do it only once for every
// wedge codebook.
static void init_wedge_signs() {
  BLOCK_SIZE sb_type;
  memset(wedge_signflip_lookup, 0, sizeof(wedge_signflip_lookup));
634
  for (sb_type = BLOCK_4X4; sb_type < BLOCK_SIZES_ALL; ++sb_type) {
635 636
    const int bw = block_size_wide[sb_type];
    const int bh = block_size_high[sb_type];
637 638 639 640 641 642
    const wedge_params_type wedge_params = wedge_params_lookup[sb_type];
    const int wbits = wedge_params.bits;
    const int wtypes = 1 << wbits;
    int i, w;
    if (wbits == 0) continue;
    for (w = 0; w < wtypes; ++w) {
643
      // Get the mask master, i.e. index [0]
644
      const uint8_t *mask = get_wedge_mask_inplace(w, 0, sb_type);
645 646 647 648 649 650 651 652 653 654 655 656 657 658 659
      int avg = 0;
      for (i = 0; i < bw; ++i) avg += mask[i];
      for (i = 1; i < bh; ++i) avg += mask[i * MASK_MASTER_STRIDE];
      avg = (avg + (bw + bh - 1) / 2) / (bw + bh - 1);
      // Default sign of this wedge is 1 if the average < 32, 0 otherwise.
      // If default sign is 1:
      //   If sign requested is 0, we need to flip the sign and return
      //   the complement i.e. index [1] instead. If sign requested is 1
      //   we need to flip the sign and return index [0] instead.
      // If default sign is 0:
      //   If sign requested is 0, we need to return index [0] the master
      //   if sign requested is 1, we need to return the complement index [1]
      //   instead.
      wedge_params.signflip[w] = (avg < 32);
      // printf("%d[%d] = %d\n", sb_type, w, wedge_params.signflip[w]);
660 661 662 663 664 665 666 667
    }
  }
}

static void init_wedge_masks() {
  uint8_t *dst = wedge_mask_buf;
  BLOCK_SIZE bsize;
  memset(wedge_masks, 0, sizeof(wedge_masks));
668
  for (bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; ++bsize) {
669
    const uint8_t *mask;
670 671
    const int bw = block_size_wide[bsize];
    const int bh = block_size_high[bsize];
672 673 674 675 676 677 678
    const wedge_params_type *wedge_params = &wedge_params_lookup[bsize];
    const int wbits = wedge_params->bits;
    const int wtypes = 1 << wbits;
    int w;
    if (wbits == 0) continue;
    for (w = 0; w < wtypes; ++w) {
      mask = get_wedge_mask_inplace(w, 0, bsize);
Yaowu Xu's avatar
Yaowu Xu committed
679
      aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw, NULL, 0, NULL, 0, bw,
clang-format's avatar
clang-format committed
680
                        bh);
681 682 683 684
      wedge_params->masks[0][w] = dst;
      dst += bw * bh;

      mask = get_wedge_mask_inplace(w, 1, bsize);
Yaowu Xu's avatar
Yaowu Xu committed
685
      aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw, NULL, 0, NULL, 0, bw,
clang-format's avatar
clang-format committed
686
                        bh);
687 688 689 690 691 692 693 694
      wedge_params->masks[1][w] = dst;
      dst += bw * bh;
    }
    assert(sizeof(wedge_mask_buf) >= (size_t)(dst - wedge_mask_buf));
  }
}

// Equation of line: f(x, y) = a[0]*(x - a[2]*w/8) + a[1]*(y - a[3]*h/8) = 0
Yaowu Xu's avatar
Yaowu Xu committed
695
void av1_init_wedge_masks() {
696
  init_wedge_master_masks();
697
  init_wedge_signs();
698
  init_wedge_masks();
699 700
}

701 702 703 704 705 706 707 708 709 710 711 712 713 714 715
#if CONFIG_CONVOLVE_ROUND
static void build_masked_compound_no_round(
    CONV_BUF_TYPE *dst, int dst_stride, const CONV_BUF_TYPE *src0,
    int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride,
    const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h,
    int w) {
  // Derive subsampling from h and w passed in. May be refactored to
  // pass in subsampling factors directly.
  const int subh = (2 << b_height_log2_lookup[sb_type]) == h;
  const int subw = (2 << b_width_log2_lookup[sb_type]) == w;
  const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
  aom_blend_a64_d32_mask(dst, dst_stride, src0, src0_stride, src1, src1_stride,
                         mask, block_size_wide[sb_type], h, w, subh, subw);
}
#endif  // CONFIG_CONVOLVE_ROUND
716 717 718 719 720
static void build_masked_compound(
    uint8_t *dst, int dst_stride, const uint8_t *src0, int src0_stride,
    const uint8_t *src1, int src1_stride,
    const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h,
    int w) {
721 722 723 724
  // Derive subsampling from h and w passed in. May be refactored to
  // pass in subsampling factors directly.
  const int subh = (2 << b_height_log2_lookup[sb_type]) == h;
  const int subw = (2 << b_width_log2_lookup[sb_type]) == w;
725
  const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
Yaowu Xu's avatar
Yaowu Xu committed
726
  aom_blend_a64_mask(dst, dst_stride, src0, src0_stride, src1, src1_stride,
727
                     mask, block_size_wide[sb_type], h, w, subh, subw);
728 729
}

730
#if CONFIG_HIGHBITDEPTH
731
static void build_masked_compound_highbd(
clang-format's avatar
clang-format committed
732
    uint8_t *dst_8, int dst_stride, const uint8_t *src0_8, int src0_stride,
733 734 735
    const uint8_t *src1_8, int src1_stride,
    const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h,
    int w, int bd) {
736 737 738 739
  // Derive subsampling from h and w passed in. May be refactored to
  // pass in subsampling factors directly.
  const int subh = (2 << b_height_log2_lookup[sb_type]) == h;
  const int subw = (2 << b_width_log2_lookup[sb_type]) == w;
740 741 742
  const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
  // const uint8_t *mask =
  //     av1_get_contiguous_soft_mask(wedge_index, wedge_sign, sb_type);
743 744 745
  aom_highbd_blend_a64_mask(dst_8, dst_stride, src0_8, src0_stride, src1_8,
                            src1_stride, mask, block_size_wide[sb_type], h, w,
                            subh, subw, bd);
746
}
747
#endif  // CONFIG_HIGHBITDEPTH
748

749 750 751 752
void av1_make_masked_inter_predictor(
    const uint8_t *pre, int pre_stride, uint8_t *dst, int dst_stride,
    const int subpel_x, const int subpel_y, const struct scale_factors *sf,
    int w, int h, ConvolveParams *conv_params, InterpFilters interp_filters,
753 754
    int xs, int ys, int plane, const WarpTypesAllowed *warp_types, int p_col,
    int p_row, int ref, MACROBLOCKD *xd) {
755 756
  const MODE_INFO *mi = xd->mi[0];

757
  const INTERINTER_COMPOUND_DATA comp_data = {
758
    mi->mbmi.wedge_index, mi->mbmi.wedge_sign, mi->mbmi.mask_type, xd->seg_mask,
759 760
    mi->mbmi.interinter_compound_type
  };
761

762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780
// We're going to call av1_make_inter_predictor to generate a prediction into
// a temporary buffer, then will blend that temporary buffer with that from
// the other reference.
//
// With CONFIG_CONVOLVE_ROUND, if the rounding mode is CONVOLVE_OPT_NO_ROUND
// then the predictions are at 32-bits, so we'll need 32 bits per
// pixel. Otherwise, we'll need up to 16 bits per pixel if
// CONFIG_HIGHBITDEPTH or just 8 otherwise.
#if CONFIG_CONVOLVE_ROUND
#define INTER_PRED_BYTES_PER_PIXEL 4
#elif CONFIG_HIGHBITDEPTH
#define INTER_PRED_BYTES_PER_PIXEL 2
#else
#define INTER_PRED_BYTES_PER_PIXEL 1
#endif
  DECLARE_ALIGNED(16, uint8_t,
                  tmp_buf[INTER_PRED_BYTES_PER_PIXEL * MAX_SB_SQUARE]);
#undef INTER_PRED_BYTES_PER_PIXEL

781 782
#if CONFIG_HIGHBITDEPTH
  uint8_t *tmp_dst = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
783 784
                         ? CONVERT_TO_BYTEPTR(tmp_buf)
                         : tmp_buf;
785
#else
786
  uint8_t *tmp_dst = tmp_buf;
787 788 789
#endif

#if CONFIG_CONVOLVE_ROUND
790
  const int tmp_buf_stride = MAX_SB_SIZE;
791
  const int is_conv_no_round = conv_params->round == CONVOLVE_OPT_NO_ROUND;
792 793
  CONV_BUF_TYPE *org_dst = conv_params->dst;
  int org_dst_stride = conv_params->dst_stride;
794
  CONV_BUF_TYPE *tmp_buf32 = (CONV_BUF_TYPE *)tmp_buf;
795
  if (is_conv_no_round) {
796 797 798
    conv_params->dst = tmp_buf32;
    conv_params->dst_stride = tmp_buf_stride;
    assert(conv_params->do_average == 0);
799 800
  }
#endif  // CONFIG_CONVOLVE_ROUND
801

802
  // This will generate a prediction in tmp_buf for the second reference
Yaowu Xu's avatar
Yaowu Xu committed
803
  av1_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE, subpel_x,
804
                           subpel_y, sf, w, h, conv_params, interp_filters,
805 806
                           warp_types, p_col, p_row, plane, ref, mi, 0, xs, ys,
                           xd);
807

808
  if (!plane && comp_data.interinter_compound_type == COMPOUND_SEG) {
809
#if CONFIG_CONVOLVE_ROUND
810
    if (is_conv_no_round) {
Yi Luo's avatar
Yi Luo committed
811 812 813 814
      build_compound_seg_mask_d32(comp_data.seg_mask, comp_data.mask_type,
                                  org_dst, org_dst_stride, tmp_buf32,
                                  tmp_buf_stride, mi->mbmi.sb_type, h, w,
                                  conv_params, xd->bd);
815
    } else {
816
#endif  // CONFIG_CONVOLVE_ROUND
817
#if CONFIG_HIGHBITDEPTH
818 819 820
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        build_compound_seg_mask_highbd(comp_data.seg_mask, comp_data.mask_type,
                                       dst, dst_stride, tmp_dst, MAX_SB_SIZE,
Yi Luo's avatar
Yi Luo committed
821
                                       mi->mbmi.sb_type, h, w, xd->bd);
822
      } else {
823
#endif
824 825 826
        build_compound_seg_mask(comp_data.seg_mask, comp_data.mask_type, dst,
                                dst_stride, tmp_dst, MAX_SB_SIZE,
                                mi->mbmi.sb_type, h, w);
827
#if CONFIG_HIGHBITDEPTH
828
      }
829
#endif
830
#if CONFIG_CONVOLVE_ROUND
831
    }
832
#endif
833
  }
834

835
#if CONFIG_CONVOLVE_ROUND
836
  if (is_conv_no_round) {
837
    build_masked_compound_no_round(org_dst, org_dst_stride, org_dst,
838
                                   org_dst_stride, tmp_buf32, tmp_buf_stride,
839
                                   &comp_data, mi->mbmi.sb_type, h, w);
840 841 842

    const int convolve_rounding_bits =
        FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1;
843 844
#if CONFIG_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
845 846
      av1_highbd_convolve_rounding(org_dst, org_dst_stride, dst, dst_stride, w,
                                   h, convolve_rounding_bits, xd->bd);
847 848
    else
#endif
849 850
      av1_convolve_rounding(org_dst, org_dst_stride, dst, dst_stride, w, h,
                            convolve_rounding_bits);
851

852 853
    conv_params->do_post_rounding = 0;
  } else {
854
#endif  // CONFIG_CONVOLVE_ROUND
855 856 857

#if CONFIG_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
858 859 860
      build_masked_compound_highbd(dst, dst_stride, dst, dst_stride, tmp_dst,
                                   MAX_SB_SIZE, &comp_data, mi->mbmi.sb_type, h,
                                   w, xd->bd);
861 862
    else
#endif  // CONFIG_HIGHBITDEPTH
863 864 865 866 867
      build_masked_compound(dst, dst_stride, dst, dst_stride, tmp_dst,
                            MAX_SB_SIZE, &comp_data, mi->mbmi.sb_type, h, w);
#if CONFIG_CONVOLVE_ROUND
  }
#endif  // CONFIG_CONVOLVE_ROUND
868
}
869

870 871 872
// TODO(sarahparker) av1_highbd_build_inter_predictor and
// av1_build_inter_predictor should be combined with
// av1_make_inter_predictor
873
#if CONFIG_HIGHBITDEPTH
874 875 876
void av1_highbd_build_inter_predictor(
    const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride,
    const MV *src_mv, const struct scale_factors *sf, int w, int h, int ref,
877 878
    InterpFilters interp_filters, const WarpTypesAllowed *warp_types, int p_col,
    int p_row, int plane, enum mv_precision precision, int x, int y,
879
    const MACROBLOCKD *xd) {
Jingning Han's avatar
Jingning Han committed
880 881 882
  const int is_q4 = precision == MV_PRECISION_Q4;
  const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row * 2,
                     is_q4 ? src_mv->col : src_mv->col * 2 };
Yaowu Xu's avatar
Yaowu Xu committed
883
  MV32 mv = av1_scale_mv(&mv_q4, x, y, sf);
Fergus Simpson's avatar
Fergus Simpson committed
884 885
  mv.col += SCALE_EXTRA_OFF;
  mv.row += SCALE_EXTRA_OFF;
886 887
  const int subpel_x = mv.col & SCALE_SUBPEL_MASK;
  const int subpel_y = mv.row & SCALE_SUBPEL_MASK;
888
  ConvolveParams conv_params = get_conv_params(ref, ref, plane);
Jingning Han's avatar
Jingning Han committed
889

890 891
  src += (mv.row >> SCALE_SUBPEL_BITS) * src_stride +
         (mv.col >> SCALE_SUBPEL_BITS);
Jingning Han's avatar
Jingning Han committed
892

893
  av1_make_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,