vp9_reconinter.c 61.2 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5 6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9 10
 */

11
#include <assert.h>
John Koleszar's avatar
John Koleszar committed
12

13
#include "./vpx_config.h"
14
#include "vpx/vpx_integer.h"
15
#include "vp9/common/vp9_blockd.h"
16
#include "vp9/common/vp9_filter.h"
17
#include "vp9/common/vp9_reconinter.h"
18
#include "vp9/common/vp9_reconintra.h"
John Koleszar's avatar
John Koleszar committed
19

20 21 22
void vp9_setup_scale_factors_for_frame(struct scale_factors *scale,
                                       YV12_BUFFER_CONFIG *other,
                                       int this_w, int this_h) {
John Koleszar's avatar
John Koleszar committed
23 24
  int other_h = other->y_crop_height;
  int other_w = other->y_crop_width;
25 26 27 28 29

  scale->x_num = other_w;
  scale->x_den = this_w;
  scale->x_offset_q4 = 0;  // calculated per-mb
  scale->x_step_q4 = 16 * other_w / this_w;
30

31 32 33 34 35
  scale->y_num = other_h;
  scale->y_den = this_h;
  scale->y_offset_q4 = 0;  // calculated per-mb
  scale->y_step_q4 = 16 * other_h / this_h;

36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
  if (scale->x_num == scale->x_den && scale->y_num == scale->y_den) {
    scale->scale_value_x = unscaled_value;
    scale->scale_value_y = unscaled_value;
    scale->set_scaled_offsets = set_offsets_without_scaling;
    scale->scale_motion_vector_q3_to_q4 =
        motion_vector_q3_to_q4_without_scaling;
    scale->scale_motion_vector_component_q4 =
        motion_vector_component_q4_without_scaling;
  } else {
    scale->scale_value_x = scale_value_x_with_scaling;
    scale->scale_value_y = scale_value_y_with_scaling;
    scale->set_scaled_offsets = set_offsets_with_scaling;
    scale->scale_motion_vector_q3_to_q4 =
        motion_vector_q3_to_q4_with_scaling;
    scale->scale_motion_vector_component_q4 =
        motion_vector_component_q4_with_scaling;
  }

54 55 56 57 58 59
  // TODO(agrange): Investigate the best choice of functions to use here
  // for EIGHTTAP_SMOOTH. Since it is not interpolating, need to choose what
  // to do at full-pel offsets. The current selection, where the filter is
  // applied in one direction only, and not at all for 0,0, seems to give the
  // best quality, but it may be worth trying an additional mode that does
  // do the filtering on full-pel.
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT
  if (scale->x_step_q4 == 16) {
    if (scale->y_step_q4 == 16) {
      // No scaling in either direction.
      scale->predict[0][0][0] = vp9_convolve_copy;
      scale->predict[0][0][1] = vp9_convolve_1by8;
      scale->predict[0][0][2] = vp9_convolve_qtr;
      scale->predict[0][0][3] = vp9_convolve_3by8;
      scale->predict[0][0][4] = vp9_convolve_avg;
      scale->predict[0][0][5] = vp9_convolve_5by8;
      scale->predict[0][0][6] = vp9_convolve_3qtr;
      scale->predict[0][0][7] = vp9_convolve_7by8;
      scale->predict[0][1][0] = vp9_convolve8_vert;
      scale->predict[0][1][1] = vp9_convolve8_1by8_vert;
      scale->predict[0][1][2] = vp9_convolve8_qtr_vert;
      scale->predict[0][1][3] = vp9_convolve8_3by8_vert;
      scale->predict[0][1][4] = vp9_convolve8_avg_vert;
      scale->predict[0][1][5] = vp9_convolve8_5by8_vert;
      scale->predict[0][1][6] = vp9_convolve8_3qtr_vert;
      scale->predict[0][1][7] = vp9_convolve8_7by8_vert;
      scale->predict[1][0][0] = vp9_convolve8_horiz;
      scale->predict[1][0][1] = vp9_convolve8_1by8_horiz;
      scale->predict[1][0][2] = vp9_convolve8_qtr_horiz;
      scale->predict[1][0][3] = vp9_convolve8_3by8_horiz;
      scale->predict[1][0][4] = vp9_convolve8_avg_horiz;
      scale->predict[1][0][5] = vp9_convolve8_5by8_horiz;
      scale->predict[1][0][6] = vp9_convolve8_3qtr_horiz;
      scale->predict[1][0][7] = vp9_convolve8_7by8_horiz;
    } else {
      // No scaling in x direction. Must always scale in the y direction.
      scale->predict[0][0][0] = vp9_convolve8_vert;
      scale->predict[0][0][1] = vp9_convolve8_1by8_vert;
      scale->predict[0][0][2] = vp9_convolve8_qtr_vert;
      scale->predict[0][0][3] = vp9_convolve8_3by8_vert;
      scale->predict[0][0][4] = vp9_convolve8_avg_vert;
      scale->predict[0][0][5] = vp9_convolve8_5by8_vert;
      scale->predict[0][0][6] = vp9_convolve8_3qtr_vert;
      scale->predict[0][0][7] = vp9_convolve8_7by8_vert;
      scale->predict[0][1][0] = vp9_convolve8_vert;
      scale->predict[0][1][1] = vp9_convolve8_1by8_vert;
      scale->predict[0][1][2] = vp9_convolve8_qtr_vert;
      scale->predict[0][1][3] = vp9_convolve8_3by8_vert;
      scale->predict[0][1][4] = vp9_convolve8_avg_vert;
      scale->predict[0][1][5] = vp9_convolve8_5by8_vert;
      scale->predict[0][1][6] = vp9_convolve8_3qtr_vert;
      scale->predict[0][1][7] = vp9_convolve8_7by8_vert;
      scale->predict[1][0][0] = vp9_convolve8;
      scale->predict[1][0][1] = vp9_convolve8_1by8;
      scale->predict[1][0][2] = vp9_convolve8_qtr;
      scale->predict[1][0][3] = vp9_convolve8_3by8;
      scale->predict[1][0][4] = vp9_convolve8_avg;
      scale->predict[1][0][5] = vp9_convolve8_5by8;
      scale->predict[1][0][6] = vp9_convolve8_3qtr;
      scale->predict[1][0][7] = vp9_convolve8_7by8;
    }
  } else {
    if (scale->y_step_q4 == 16) {
      // No scaling in the y direction. Must always scale in the x direction.
      scale->predict[0][0][0] = vp9_convolve8_horiz;
      scale->predict[0][0][1] = vp9_convolve8_1by8_horiz;
      scale->predict[0][0][2] = vp9_convolve8_qtr_horiz;
      scale->predict[0][0][3] = vp9_convolve8_3by8_horiz;
      scale->predict[0][0][4] = vp9_convolve8_avg_horiz;
      scale->predict[0][0][5] = vp9_convolve8_5by8_horiz;
      scale->predict[0][0][6] = vp9_convolve8_3qtr_horiz;
      scale->predict[0][0][7] = vp9_convolve8_7by8_horiz;
      scale->predict[0][1][0] = vp9_convolve8;
      scale->predict[0][1][1] = vp9_convolve8_1by8;
      scale->predict[0][1][2] = vp9_convolve8_qtr;
      scale->predict[0][1][3] = vp9_convolve8_3by8;
      scale->predict[0][1][4] = vp9_convolve8_avg;
      scale->predict[0][1][5] = vp9_convolve8_5by8;
      scale->predict[0][1][6] = vp9_convolve8_3qtr;
      scale->predict[0][1][7] = vp9_convolve8_7by8;
      scale->predict[1][0][0] = vp9_convolve8_horiz;
      scale->predict[1][0][1] = vp9_convolve8_1by8_horiz;
      scale->predict[1][0][2] = vp9_convolve8_qtr_horiz;
      scale->predict[1][0][3] = vp9_convolve8_3by8_horiz;
      scale->predict[1][0][4] = vp9_convolve8_avg_horiz;
      scale->predict[1][0][5] = vp9_convolve8_5by8_horiz;
      scale->predict[1][0][6] = vp9_convolve8_3qtr_horiz;
      scale->predict[1][0][7] = vp9_convolve8_7by8_horiz;
    } else {
      // Must always scale in both directions.
      scale->predict[0][0][0] = vp9_convolve8;
      scale->predict[0][0][1] = vp9_convolve8_1by8;
      scale->predict[0][0][2] = vp9_convolve8_qtr;
      scale->predict[0][0][3] = vp9_convolve8_3by8;
      scale->predict[0][0][4] = vp9_convolve8_avg;
      scale->predict[0][0][5] = vp9_convolve8_5by8;
      scale->predict[0][0][6] = vp9_convolve8_3qtr;
      scale->predict[0][0][7] = vp9_convolve8_7by8;
      scale->predict[0][1][0] = vp9_convolve8;
      scale->predict[0][1][1] = vp9_convolve8_1by8;
      scale->predict[0][1][2] = vp9_convolve8_qtr;
      scale->predict[0][1][3] = vp9_convolve8_3by8;
      scale->predict[0][1][4] = vp9_convolve8_avg;
      scale->predict[0][1][5] = vp9_convolve8_5by8;
      scale->predict[0][1][6] = vp9_convolve8_3qtr;
      scale->predict[0][1][7] = vp9_convolve8_7by8;
      scale->predict[1][0][0] = vp9_convolve8;
      scale->predict[1][0][1] = vp9_convolve8_1by8;
      scale->predict[1][0][2] = vp9_convolve8_qtr;
      scale->predict[1][0][3] = vp9_convolve8_3by8;
      scale->predict[1][0][4] = vp9_convolve8_avg;
      scale->predict[1][0][5] = vp9_convolve8_5by8;
      scale->predict[1][0][6] = vp9_convolve8_3qtr;
      scale->predict[1][0][7] = vp9_convolve8_7by8;
    }
  }
  // 2D subpel motion always gets filtered in both directions
  scale->predict[1][1][0] = vp9_convolve8;
  scale->predict[1][1][1] = vp9_convolve8_1by8;
  scale->predict[1][1][2] = vp9_convolve8_qtr;
  scale->predict[1][1][3] = vp9_convolve8_3by8;
  scale->predict[1][1][4] = vp9_convolve8_avg;
  scale->predict[1][1][5] = vp9_convolve8_5by8;
  scale->predict[1][1][6] = vp9_convolve8_3qtr;
  scale->predict[1][1][7] = vp9_convolve8_7by8;
}
#else
181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221
  if (scale->x_step_q4 == 16) {
    if (scale->y_step_q4 == 16) {
      // No scaling in either direction.
      scale->predict[0][0][0] = vp9_convolve_copy;
      scale->predict[0][0][1] = vp9_convolve_avg;
      scale->predict[0][1][0] = vp9_convolve8_vert;
      scale->predict[0][1][1] = vp9_convolve8_avg_vert;
      scale->predict[1][0][0] = vp9_convolve8_horiz;
      scale->predict[1][0][1] = vp9_convolve8_avg_horiz;
    } else {
      // No scaling in x direction. Must always scale in the y direction.
      scale->predict[0][0][0] = vp9_convolve8_vert;
      scale->predict[0][0][1] = vp9_convolve8_avg_vert;
      scale->predict[0][1][0] = vp9_convolve8_vert;
      scale->predict[0][1][1] = vp9_convolve8_avg_vert;
      scale->predict[1][0][0] = vp9_convolve8;
      scale->predict[1][0][1] = vp9_convolve8_avg;
    }
  } else {
    if (scale->y_step_q4 == 16) {
      // No scaling in the y direction. Must always scale in the x direction.
      scale->predict[0][0][0] = vp9_convolve8_horiz;
      scale->predict[0][0][1] = vp9_convolve8_avg_horiz;
      scale->predict[0][1][0] = vp9_convolve8;
      scale->predict[0][1][1] = vp9_convolve8_avg;
      scale->predict[1][0][0] = vp9_convolve8_horiz;
      scale->predict[1][0][1] = vp9_convolve8_avg_horiz;
    } else {
      // Must always scale in both directions.
      scale->predict[0][0][0] = vp9_convolve8;
      scale->predict[0][0][1] = vp9_convolve8_avg;
      scale->predict[0][1][0] = vp9_convolve8;
      scale->predict[0][1][1] = vp9_convolve8_avg;
      scale->predict[1][0][0] = vp9_convolve8;
      scale->predict[1][0][1] = vp9_convolve8_avg;
    }
  }
  // 2D subpel motion always gets filtered in both directions
  scale->predict[1][1][0] = vp9_convolve8;
  scale->predict[1][1][1] = vp9_convolve8_avg;
}
222
#endif
223 224 225 226 227 228 229 230 231 232 233 234 235

void vp9_setup_interp_filters(MACROBLOCKD *xd,
                              INTERPOLATIONFILTERTYPE mcomp_filter_type,
                              VP9_COMMON *cm) {
  if (xd->mode_info_context) {
    MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;

    set_scale_factors(xd,
                      mbmi->ref_frame - 1,
                      mbmi->second_ref_frame - 1,
                      cm->active_ref_scale);
  }

236 237 238 239 240 241 242 243 244 245 246 247 248 249
  switch (mcomp_filter_type) {
    case EIGHTTAP:
    case SWITCHABLE:
      xd->subpix.filter_x = xd->subpix.filter_y = vp9_sub_pel_filters_8;
      break;
    case EIGHTTAP_SMOOTH:
      xd->subpix.filter_x = xd->subpix.filter_y = vp9_sub_pel_filters_8lp;
      break;
    case EIGHTTAP_SHARP:
      xd->subpix.filter_x = xd->subpix.filter_y = vp9_sub_pel_filters_8s;
      break;
    case BILINEAR:
      xd->subpix.filter_x = xd->subpix.filter_y = vp9_bilinear_filters;
      break;
Adrian Grange's avatar
Adrian Grange committed
250
#if CONFIG_ENABLE_6TAP
251 252 253
    case SIXTAP:
      xd->subpix.filter_x = xd->subpix.filter_y = vp9_sub_pel_filters_6;
      break;
Adrian Grange's avatar
Adrian Grange committed
254 255
#endif
  }
256
  assert(((intptr_t)xd->subpix.filter_x & 0xff) == 0);
257 258
}

259
void vp9_copy_mem16x16_c(const uint8_t *src,
260
                         int src_stride,
261
                         uint8_t *dst,
262
                         int dst_stride) {
John Koleszar's avatar
John Koleszar committed
263
  int r;
John Koleszar's avatar
John Koleszar committed
264

John Koleszar's avatar
John Koleszar committed
265
  for (r = 0; r < 16; r++) {
266
#if !(CONFIG_FAST_UNALIGNED)
John Koleszar's avatar
John Koleszar committed
267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282
    dst[0] = src[0];
    dst[1] = src[1];
    dst[2] = src[2];
    dst[3] = src[3];
    dst[4] = src[4];
    dst[5] = src[5];
    dst[6] = src[6];
    dst[7] = src[7];
    dst[8] = src[8];
    dst[9] = src[9];
    dst[10] = src[10];
    dst[11] = src[11];
    dst[12] = src[12];
    dst[13] = src[13];
    dst[14] = src[14];
    dst[15] = src[15];
John Koleszar's avatar
John Koleszar committed
283 284

#else
285 286 287 288
    ((uint32_t *)dst)[0] = ((const uint32_t *)src)[0];
    ((uint32_t *)dst)[1] = ((const uint32_t *)src)[1];
    ((uint32_t *)dst)[2] = ((const uint32_t *)src)[2];
    ((uint32_t *)dst)[3] = ((const uint32_t *)src)[3];
John Koleszar's avatar
John Koleszar committed
289 290

#endif
John Koleszar's avatar
John Koleszar committed
291 292 293
    src += src_stride;
    dst += dst_stride;
  }
John Koleszar's avatar
John Koleszar committed
294 295
}

296
void vp9_copy_mem8x8_c(const uint8_t *src,
297
                       int src_stride,
298
                       uint8_t *dst,
299
                       int dst_stride) {
John Koleszar's avatar
John Koleszar committed
300 301 302
  int r;

  for (r = 0; r < 8; r++) {
303
#if !(CONFIG_FAST_UNALIGNED)
John Koleszar's avatar
John Koleszar committed
304 305 306 307 308 309 310 311
    dst[0] = src[0];
    dst[1] = src[1];
    dst[2] = src[2];
    dst[3] = src[3];
    dst[4] = src[4];
    dst[5] = src[5];
    dst[6] = src[6];
    dst[7] = src[7];
John Koleszar's avatar
John Koleszar committed
312
#else
313 314
    ((uint32_t *)dst)[0] = ((const uint32_t *)src)[0];
    ((uint32_t *)dst)[1] = ((const uint32_t *)src)[1];
John Koleszar's avatar
John Koleszar committed
315
#endif
John Koleszar's avatar
John Koleszar committed
316 317 318
    src += src_stride;
    dst += dst_stride;
  }
John Koleszar's avatar
John Koleszar committed
319 320
}

321
void vp9_copy_mem8x4_c(const uint8_t *src,
322
                       int src_stride,
323
                       uint8_t *dst,
324
                       int dst_stride) {
John Koleszar's avatar
John Koleszar committed
325 326 327
  int r;

  for (r = 0; r < 4; r++) {
328
#if !(CONFIG_FAST_UNALIGNED)
John Koleszar's avatar
John Koleszar committed
329 330 331 332 333 334 335 336
    dst[0] = src[0];
    dst[1] = src[1];
    dst[2] = src[2];
    dst[3] = src[3];
    dst[4] = src[4];
    dst[5] = src[5];
    dst[6] = src[6];
    dst[7] = src[7];
John Koleszar's avatar
John Koleszar committed
337
#else
338 339
    ((uint32_t *)dst)[0] = ((const uint32_t *)src)[0];
    ((uint32_t *)dst)[1] = ((const uint32_t *)src)[1];
John Koleszar's avatar
John Koleszar committed
340
#endif
John Koleszar's avatar
John Koleszar committed
341 342 343
    src += src_stride;
    dst += dst_stride;
  }
John Koleszar's avatar
John Koleszar committed
344 345
}

346 347 348 349
void vp9_build_inter_predictor(const uint8_t *src, int src_stride,
                               uint8_t *dst, int dst_stride,
                               const int_mv *mv_q3,
                               const struct scale_factors *scale,
350
                               int w, int h, int weight,
351
                               const struct subpix_fn_table *subpix) {
352
  int_mv32 mv = scale->scale_motion_vector_q3_to_q4(mv_q3, scale);
353
  src += (mv.as_mv.row >> 4) * src_stride + (mv.as_mv.col >> 4);
354
  scale->predict[!!(mv.as_mv.col & 15)][!!(mv.as_mv.row & 15)][weight](
355
      src, src_stride, dst, dst_stride,
356 357
      subpix->filter_x[mv.as_mv.col & 15], scale->x_step_q4,
      subpix->filter_y[mv.as_mv.row & 15], scale->y_step_q4,
358
      w, h);
John Koleszar's avatar
John Koleszar committed
359 360
}

361 362
/* Like vp9_build_inter_predictor, but takes the full-pel part of the
 * mv separately, and the fractional part as a q4.
363
 */
364 365 366 367 368
void vp9_build_inter_predictor_q4(const uint8_t *src, int src_stride,
                                  uint8_t *dst, int dst_stride,
                                  const int_mv *fullpel_mv_q3,
                                  const int_mv *frac_mv_q4,
                                  const struct scale_factors *scale,
369
                                  int w, int h, int weight,
370 371 372 373 374 375
                                  const struct subpix_fn_table *subpix) {
  const int mv_row_q4 = ((fullpel_mv_q3->as_mv.row >> 3) << 4)
                        + (frac_mv_q4->as_mv.row & 0xf);
  const int mv_col_q4 = ((fullpel_mv_q3->as_mv.col >> 3) << 4)
                        + (frac_mv_q4->as_mv.col & 0xf);
  const int scaled_mv_row_q4 =
376 377
      scale->scale_motion_vector_component_q4(mv_row_q4, scale->y_num,
                                              scale->y_den, scale->y_offset_q4);
378
  const int scaled_mv_col_q4 =
379 380
      scale->scale_motion_vector_component_q4(mv_col_q4, scale->x_num,
                                              scale->x_den, scale->x_offset_q4);
381 382 383
  const int subpel_x = scaled_mv_col_q4 & 15;
  const int subpel_y = scaled_mv_row_q4 & 15;

384
  src += (scaled_mv_row_q4 >> 4) * src_stride + (scaled_mv_col_q4 >> 4);
385
  scale->predict[!!subpel_x][!!subpel_y][weight](
386
      src, src_stride, dst, dst_stride,
387 388
      subpix->filter_x[subpel_x], scale->x_step_q4,
      subpix->filter_y[subpel_y], scale->y_step_q4,
389
      w, h);
390 391
}

392
static void build_2x1_inter_predictor_wh(const BLOCKD *d0, const BLOCKD *d1,
393
                                         struct scale_factors *s,
394 395 396 397 398 399
                                         uint8_t *predictor,
                                         int block_size, int stride,
                                         int which_mv, int weight,
                                         int width, int height,
                                         const struct subpix_fn_table *subpix,
                                         int row, int col) {
400 401
  struct scale_factors * scale = &s[which_mv];

402 403 404
  assert(d1->predictor - d0->predictor == block_size);
  assert(d1->pre == d0->pre + block_size);

405
  scale->set_scaled_offsets(scale, row, col);
406

407 408 409 410 411
  if (d0->bmi.as_mv[which_mv].as_int == d1->bmi.as_mv[which_mv].as_int) {
    uint8_t **base_pre = which_mv ? d0->base_second_pre : d0->base_pre;

    vp9_build_inter_predictor(*base_pre + d0->pre,
                              d0->pre_stride,
412
                              predictor, stride,
413
                              &d0->bmi.as_mv[which_mv],
414
                              scale,
415 416
                              width, height,
                              weight, subpix);
417 418 419 420 421 422 423

  } else {
    uint8_t **base_pre0 = which_mv ? d0->base_second_pre : d0->base_pre;
    uint8_t **base_pre1 = which_mv ? d1->base_second_pre : d1->base_pre;

    vp9_build_inter_predictor(*base_pre0 + d0->pre,
                              d0->pre_stride,
424
                              predictor, stride,
425
                              &d0->bmi.as_mv[which_mv],
426
                              scale,
427 428 429 430
                              width > block_size ? block_size : width, height,
                              weight, subpix);

    if (width <= block_size) return;
431

432
    scale->set_scaled_offsets(scale, row, col + block_size);
433

434 435
    vp9_build_inter_predictor(*base_pre1 + d1->pre,
                              d1->pre_stride,
436
                              predictor + block_size, stride,
437
                              &d1->bmi.as_mv[which_mv],
438
                              scale,
439 440
                              width - block_size, height,
                              weight, subpix);
441
  }
John Koleszar's avatar
John Koleszar committed
442 443
}

444
static void build_2x1_inter_predictor(const BLOCKD *d0, const BLOCKD *d1,
445
                                      struct scale_factors *s,
446 447 448
                                      int block_size, int stride,
                                      int which_mv, int weight,
                                      const struct subpix_fn_table *subpix,
449 450 451
                                      int row, int col, int use_dst) {
  uint8_t *d0_predictor = use_dst ? *(d0->base_dst) + d0->dst : d0->predictor;
  uint8_t *d1_predictor = use_dst ? *(d1->base_dst) + d1->dst : d1->predictor;
452
  struct scale_factors * scale = &s[which_mv];
453
  stride = use_dst ? d0->dst_stride : stride;
454

455
  assert(d1_predictor - d0_predictor == block_size);
456
  assert(d1->pre == d0->pre + block_size);
457

458
  scale->set_scaled_offsets(scale, row, col);
John Koleszar's avatar
John Koleszar committed
459

460 461
  if (d0->bmi.as_mv[which_mv].as_int == d1->bmi.as_mv[which_mv].as_int) {
    uint8_t **base_pre = which_mv ? d0->base_second_pre : d0->base_pre;
John Koleszar's avatar
John Koleszar committed
462

463 464
    vp9_build_inter_predictor(*base_pre + d0->pre,
                              d0->pre_stride,
465
                              d0_predictor, stride,
466
                              &d0->bmi.as_mv[which_mv],
467
                              scale,
468 469 470 471 472
                              2 * block_size, block_size,
                              weight, subpix);
  } else {
    uint8_t **base_pre0 = which_mv ? d0->base_second_pre : d0->base_pre;
    uint8_t **base_pre1 = which_mv ? d1->base_second_pre : d1->base_pre;
John Koleszar's avatar
John Koleszar committed
473

474 475
    vp9_build_inter_predictor(*base_pre0 + d0->pre,
                              d0->pre_stride,
476
                              d0_predictor, stride,
477
                              &d0->bmi.as_mv[which_mv],
478
                              scale,
479 480
                              block_size, block_size,
                              weight, subpix);
481

482
    scale->set_scaled_offsets(scale, row, col + block_size);
John Koleszar's avatar
John Koleszar committed
483

484 485
    vp9_build_inter_predictor(*base_pre1 + d1->pre,
                              d1->pre_stride,
486
                              d1_predictor, stride,
487
                              &d1->bmi.as_mv[which_mv],
488
                              scale,
489 490
                              block_size, block_size,
                              weight, subpix);
John Koleszar's avatar
John Koleszar committed
491
  }
John Koleszar's avatar
John Koleszar committed
492 493
}

John Koleszar's avatar
John Koleszar committed
494 495 496 497 498 499 500 501 502 503
static void clamp_mv_to_umv_border(MV *mv, const MACROBLOCKD *xd) {
  /* If the MV points so far into the UMV border that no visible pixels
   * are used for reconstruction, the subpel part of the MV can be
   * discarded and the MV limited to 16 pixels with equivalent results.
   *
   * This limit kicks in at 19 pixels for the top and left edges, for
   * the 16 pixels plus 3 taps right of the central pixel when subpel
   * filtering. The bottom and right edges use 16 pixels plus 2 pixels
   * left of the central pixel when filtering.
   */
John Koleszar's avatar
John Koleszar committed
504
  if (mv->col < (xd->mb_to_left_edge - ((16 + VP9_INTERP_EXTEND) << 3)))
John Koleszar's avatar
John Koleszar committed
505
    mv->col = xd->mb_to_left_edge - (16 << 3);
John Koleszar's avatar
John Koleszar committed
506
  else if (mv->col > xd->mb_to_right_edge + ((15 + VP9_INTERP_EXTEND) << 3))
John Koleszar's avatar
John Koleszar committed
507 508
    mv->col = xd->mb_to_right_edge + (16 << 3);

John Koleszar's avatar
John Koleszar committed
509
  if (mv->row < (xd->mb_to_top_edge - ((16 + VP9_INTERP_EXTEND) << 3)))
John Koleszar's avatar
John Koleszar committed
510
    mv->row = xd->mb_to_top_edge - (16 << 3);
John Koleszar's avatar
John Koleszar committed
511
  else if (mv->row > xd->mb_to_bottom_edge + ((15 + VP9_INTERP_EXTEND) << 3))
John Koleszar's avatar
John Koleszar committed
512
    mv->row = xd->mb_to_bottom_edge + (16 << 3);
513 514 515
}

/* A version of the above function for chroma block MVs.*/
John Koleszar's avatar
John Koleszar committed
516
static void clamp_uvmv_to_umv_border(MV *mv, const MACROBLOCKD *xd) {
John Koleszar's avatar
John Koleszar committed
517 518 519
  const int extend = VP9_INTERP_EXTEND;

  mv->col = (2 * mv->col < (xd->mb_to_left_edge - ((16 + extend) << 3))) ?
John Koleszar's avatar
John Koleszar committed
520
            (xd->mb_to_left_edge - (16 << 3)) >> 1 : mv->col;
John Koleszar's avatar
John Koleszar committed
521
  mv->col = (2 * mv->col > xd->mb_to_right_edge + ((15 + extend) << 3)) ?
John Koleszar's avatar
John Koleszar committed
522 523
            (xd->mb_to_right_edge + (16 << 3)) >> 1 : mv->col;

John Koleszar's avatar
John Koleszar committed
524
  mv->row = (2 * mv->row < (xd->mb_to_top_edge - ((16 + extend) << 3))) ?
John Koleszar's avatar
John Koleszar committed
525
            (xd->mb_to_top_edge - (16 << 3)) >> 1 : mv->row;
John Koleszar's avatar
John Koleszar committed
526
  mv->row = (2 * mv->row > xd->mb_to_bottom_edge + ((15 + extend) << 3)) ?
John Koleszar's avatar
John Koleszar committed
527
            (xd->mb_to_bottom_edge + (16 << 3)) >> 1 : mv->row;
528 529
}

530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751
#define AVERAGE_WEIGHT  (1 << (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT))

#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT

// Whether to use implicit weighting for UV
#define USE_IMPLICIT_WEIGHT_UV

// Whether to use implicit weighting for SplitMV
// #define USE_IMPLICIT_WEIGHT_SPLITMV

// #define SEARCH_MIN3
static int64_t get_consistency_metric(MACROBLOCKD *xd,
                                      uint8_t *tmp_y, int tmp_ystride) {
  int block_size = 16 <<  xd->mode_info_context->mbmi.sb_type;
  uint8_t *rec_y = xd->dst.y_buffer;
  int rec_ystride = xd->dst.y_stride;
  int64_t metric = 0;
  int i;
  if (xd->up_available) {
    for (i = 0; i < block_size; ++i) {
      int diff = abs(*(rec_y - rec_ystride + i) -
                     *(tmp_y + i));
#ifdef SEARCH_MIN3
      // Searches for the min abs diff among 3 pixel neighbors in the border
      int diff1 = xd->left_available ?
          abs(*(rec_y - rec_ystride + i - 1) - *(tmp_y + i)) : diff;
      int diff2 = i < block_size - 1 ?
          abs(*(rec_y - rec_ystride + i + 1) - *(tmp_y + i)) : diff;
      diff = diff <= diff1 ? diff : diff1;
      diff = diff <= diff2 ? diff : diff2;
#endif
      metric += diff;
    }
  }
  if (xd->left_available) {
    for (i = 0; i < block_size; ++i) {
      int diff = abs(*(rec_y - 1 + i * rec_ystride) -
                     *(tmp_y + i * tmp_ystride));
#ifdef SEARCH_MIN3
      // Searches for the min abs diff among 3 pixel neighbors in the border
      int diff1 = xd->up_available ?
          abs(*(rec_y - 1 + (i - 1) * rec_ystride) -
                      *(tmp_y + i * tmp_ystride)) : diff;
      int diff2 = i < block_size - 1 ?
          abs(*(rec_y - 1 + (i + 1) * rec_ystride) -
              *(tmp_y + i * tmp_ystride)) : diff;
      diff = diff <= diff1 ? diff : diff1;
      diff = diff <= diff2 ? diff : diff2;
#endif
      metric += diff;
    }
  }
  return metric;
}

static int get_weight(MACROBLOCKD *xd, int64_t metric_1, int64_t metric_2) {
  int weight = AVERAGE_WEIGHT;
  if (2 * metric_1 < metric_2)
    weight = 6;
  else if (4 * metric_1 < 3 * metric_2)
    weight = 5;
  else if (2 * metric_2 < metric_1)
    weight = 2;
  else if (4 * metric_2 < 3 * metric_1)
    weight = 3;
  return weight;
}

#ifdef USE_IMPLICIT_WEIGHT_SPLITMV
static int get_implicit_compoundinter_weight_splitmv(
    MACROBLOCKD *xd, int mb_row, int mb_col) {
  MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
  BLOCKD *blockd = xd->block;
  const int use_second_ref = mbmi->second_ref_frame > 0;
  int64_t metric_2 = 0, metric_1 = 0;
  int i, which_mv, weight;
  uint8_t tmp_y[256];
  const int tmp_ystride = 16;

  if (!use_second_ref) return 0;
  if (!(xd->up_available || xd->left_available))
    return AVERAGE_WEIGHT;

  assert(xd->mode_info_context->mbmi.mode == SPLITMV);

  which_mv = 1;  // second predictor
  if (xd->mode_info_context->mbmi.partitioning != PARTITIONING_4X4) {
    for (i = 0; i < 16; i += 8) {
      BLOCKD *d0 = &blockd[i];
      BLOCKD *d1 = &blockd[i + 2];
      const int y = i & 8;

      blockd[i + 0].bmi = xd->mode_info_context->bmi[i + 0];
      blockd[i + 2].bmi = xd->mode_info_context->bmi[i + 2];

      if (mbmi->need_to_clamp_mvs) {
        clamp_mv_to_umv_border(&blockd[i + 0].bmi.as_mv[which_mv].as_mv, xd);
        clamp_mv_to_umv_border(&blockd[i + 2].bmi.as_mv[which_mv].as_mv, xd);
      }
      if (i == 0) {
        build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y, 8, 16,
                                     which_mv, 0, 16, 1,
                                     &xd->subpix, mb_row * 16 + y, mb_col * 16);
        build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y, 8, 16,
                                     which_mv, 0, 1, 8,
                                     &xd->subpix, mb_row * 16 + y, mb_col * 16);
      } else {
        build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y + 8 * 16,
                                     8, 16, which_mv, 0, 1, 8,
                                     &xd->subpix, mb_row * 16 + y, mb_col * 16);
      }
    }
  } else {
    for (i = 0; i < 16; i += 2) {
      BLOCKD *d0 = &blockd[i];
      BLOCKD *d1 = &blockd[i + 1];
      const int x = (i & 3) * 4;
      const int y = (i >> 2) * 4;

      blockd[i + 0].bmi = xd->mode_info_context->bmi[i + 0];
      blockd[i + 1].bmi = xd->mode_info_context->bmi[i + 1];

      if (i >= 4 && (i & 3) != 0) continue;

      if (i == 0) {
        build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y, 4, 16,
                                     which_mv, 0, 8, 1, &xd->subpix,
                                     mb_row * 16 + y, mb_col * 16 + x);
        build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y, 4, 16,
                                     which_mv, 0, 1, 4, &xd->subpix,
                                     mb_row * 16 + y, mb_col * 16 + x);
      } else if (i < 4) {
        build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y + x, 4, 16,
                                     which_mv, 0, 8, 1, &xd->subpix,
                                     mb_row * 16 + y, mb_col * 16 + x);
      } else {
        build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y + y * 16,
                                     4, 16, which_mv, 0, 1, 4, &xd->subpix,
                                     mb_row * 16 + y, mb_col * 16 + x);
      }
    }
  }
  metric_2 = get_consistency_metric(xd, tmp_y, tmp_ystride);

  which_mv = 0;  // first predictor
  if (xd->mode_info_context->mbmi.partitioning != PARTITIONING_4X4) {
    for (i = 0; i < 16; i += 8) {
      BLOCKD *d0 = &blockd[i];
      BLOCKD *d1 = &blockd[i + 2];
      const int y = i & 8;

      blockd[i + 0].bmi = xd->mode_info_context->bmi[i + 0];
      blockd[i + 2].bmi = xd->mode_info_context->bmi[i + 2];

      if (mbmi->need_to_clamp_mvs) {
        clamp_mv_to_umv_border(&blockd[i + 0].bmi.as_mv[which_mv].as_mv, xd);
        clamp_mv_to_umv_border(&blockd[i + 2].bmi.as_mv[which_mv].as_mv, xd);
      }
      if (i == 0) {
        build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y, 8, 16,
                                     which_mv, 0, 16, 1,
                                     &xd->subpix, mb_row * 16 + y, mb_col * 16);
        build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y, 8, 16,
                                     which_mv, 0, 1, 8,
                                     &xd->subpix, mb_row * 16 + y, mb_col * 16);
      } else {
        build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y + 8 * 16,
                                     8, 16, which_mv, 0, 1, 8,
                                     &xd->subpix, mb_row * 16 + y, mb_col * 16);
      }
    }
  } else {
    for (i = 0; i < 16; i += 2) {
      BLOCKD *d0 = &blockd[i];
      BLOCKD *d1 = &blockd[i + 1];
      const int x = (i & 3) * 4;
      const int y = (i >> 2) * 4;

      blockd[i + 0].bmi = xd->mode_info_context->bmi[i + 0];
      blockd[i + 1].bmi = xd->mode_info_context->bmi[i + 1];

      if (i >= 4 && (i & 3) != 0) continue;

      if (i == 0) {
        build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y, 4, 16,
                                     which_mv, 0, 8, 1, &xd->subpix,
                                     mb_row * 16 + y, mb_col * 16 + x);
        build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y, 4, 16,
                                     which_mv, 0, 1, 4, &xd->subpix,
                                     mb_row * 16 + y, mb_col * 16 + x);
      } else if (i < 4) {
        build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y + x, 4, 16,
                                     which_mv, 0, 8, 1, &xd->subpix,
                                     mb_row * 16 + y, mb_col * 16 + x);
      } else {
        build_2x1_inter_predictor_wh(d0, d1, xd->scale_factor, tmp_y + y * 16,
                                     4, 16, which_mv, 0, 1, 4, &xd->subpix,
                                     mb_row * 16 + y, mb_col * 16 + x);
      }
    }
  }
  metric_1 = get_consistency_metric(xd, tmp_y, tmp_ystride);

  // Choose final weight for averaging
  weight = get_weight(xd, metric_1, metric_2);
  return weight;
}
#endif

static int get_implicit_compoundinter_weight(MACROBLOCKD *xd,
                                             int mb_row,
                                             int mb_col) {
  const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0;
  int64_t metric_2 = 0, metric_1 = 0;
  int n, clamp_mvs, pre_stride;
  uint8_t *base_pre;
  int_mv ymv;
  uint8_t tmp_y[4096];
  const int tmp_ystride = 64;
  int weight;
  int edge[4];
  int block_size = 16 <<  xd->mode_info_context->mbmi.sb_type;
752
  struct scale_factors *scale;
753 754 755 756 757 758 759 760 761 762 763 764 765 766 767

  if (!use_second_ref) return 0;
  if (!(xd->up_available || xd->left_available))
    return AVERAGE_WEIGHT;

  edge[0] = xd->mb_to_top_edge;
  edge[1] = xd->mb_to_bottom_edge;
  edge[2] = xd->mb_to_left_edge;
  edge[3] = xd->mb_to_right_edge;

  clamp_mvs = xd->mode_info_context->mbmi.need_to_clamp_secondmv;
  base_pre = xd->second_pre.y_buffer;
  pre_stride = xd->second_pre.y_stride;
  ymv.as_int = xd->mode_info_context->mbmi.mv[1].as_int;
  // First generate the second predictor
768
  scale = &xd->scale_factor[1];
769 770 771 772 773
  for (n = 0; n < block_size; n += 16) {
    xd->mb_to_left_edge   = edge[2] - (n << 3);
    xd->mb_to_right_edge  = edge[3] + ((16 - n) << 3);
    if (clamp_mvs)
      clamp_mv_to_umv_border(&ymv.as_mv, xd);
774
    scale->set_scaled_offsets(scale, mb_row * 16, mb_col * 16 + n);
775
    // predict a single row of pixels
776 777 778
    vp9_build_inter_predictor(base_pre +
        scaled_buffer_offset(n, 0, pre_stride, scale),
        pre_stride, tmp_y + n, tmp_ystride, &ymv, scale, 16, 1, 0, &xd->subpix);
779 780 781 782 783 784 785 786
  }
  xd->mb_to_left_edge = edge[2];
  xd->mb_to_right_edge = edge[3];
  for (n = 0; n < block_size; n += 16) {
    xd->mb_to_top_edge    = edge[0] - (n << 3);
    xd->mb_to_bottom_edge = edge[1] + ((16 - n) << 3);
    if (clamp_mvs)
      clamp_mv_to_umv_border(&ymv.as_mv, xd);
787
    scale->set_scaled_offsets(scale, mb_row * 16 + n, mb_col * 16);
788
    // predict a single col of pixels
789 790
    vp9_build_inter_predictor(base_pre +
        scaled_buffer_offset(0, n, pre_stride, scale),
791
        pre_stride, tmp_y + n * tmp_ystride, tmp_ystride, &ymv,
792
        scale, 1, 16, 0, &xd->subpix);
793 794 795 796 797 798 799 800 801 802 803
  }
  xd->mb_to_top_edge = edge[0];
  xd->mb_to_bottom_edge = edge[1];
  // Compute consistency metric
  metric_2 = get_consistency_metric(xd, tmp_y, tmp_ystride);

  clamp_mvs = xd->mode_info_context->mbmi.need_to_clamp_mvs;
  base_pre = xd->pre.y_buffer;
  pre_stride = xd->pre.y_stride;
  ymv.as_int = xd->mode_info_context->mbmi.mv[0].as_int;
  // Now generate the first predictor
804
  scale = &xd->scale_factor[0];
805 806 807 808 809
  for (n = 0; n < block_size; n += 16) {
    xd->mb_to_left_edge   = edge[2] - (n << 3);
    xd->mb_to_right_edge  = edge[3] + ((16 - n) << 3);
    if (clamp_mvs)
      clamp_mv_to_umv_border(&ymv.as_mv, xd);
810
    scale->set_scaled_offsets(scale, mb_row * 16, mb_col * 16 + n);
811
    // predict a single row of pixels
812 813 814
    vp9_build_inter_predictor(base_pre +
        scaled_buffer_offset(n, 0, pre_stride, scale),
        pre_stride, tmp_y + n, tmp_ystride, &ymv, scale, 16, 1, 0, &xd->subpix);
815 816 817 818 819 820 821 822
  }
  xd->mb_to_left_edge = edge[2];
  xd->mb_to_right_edge = edge[3];
  for (n = 0; n < block_size; n += 16) {
    xd->mb_to_top_edge    = edge[0] - (n << 3);
    xd->mb_to_bottom_edge = edge[1] + ((16 - n) << 3);
    if (clamp_mvs)
      clamp_mv_to_umv_border(&ymv.as_mv, xd);
823
    scale->set_scaled_offsets(scale, mb_row * 16 + n, mb_col * 16);
824
    // predict a single col of pixels
825 826
    vp9_build_inter_predictor(base_pre +
        scaled_buffer_offset(0, n, pre_stride, scale),
827
        pre_stride, tmp_y + n * tmp_ystride, tmp_ystride, &ymv,
828
        scale, 1, 16, 0, &xd->subpix);
829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844
  }
  xd->mb_to_top_edge = edge[0];
  xd->mb_to_bottom_edge = edge[1];
  metric_1 = get_consistency_metric(xd, tmp_y, tmp_ystride);

  // Choose final weight for averaging
  weight = get_weight(xd, metric_1, metric_2);
  return weight;
}

static void build_inter16x16_predictors_mby_w(MACROBLOCKD *xd,
                                              uint8_t *dst_y,
                                              int dst_ystride,
                                              int weight,
                                              int mb_row,
                                              int mb_col) {
845 846 847 848
  const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0;
  int which_mv;

  for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) {
849
    const int clamp_mvs = which_mv ?
850
        xd->mode_info_context->mbmi.need_to_clamp_secondmv :
851
         xd->mode_info_context->mbmi.need_to_clamp_mvs;
852

853 854 855
    uint8_t *base_pre = which_mv ? xd->second_pre.y_buffer : xd->pre.y_buffer;
    int pre_stride = which_mv ? xd->second_pre.y_stride : xd->pre.y_stride;
    int_mv ymv;
856 857
    struct scale_factors *scale = &xd->scale_factor[which_mv];

858
    ymv.as_int = xd->mode_info_context->mbmi.mv[which_mv].as_int;
859

860 861 862
    if (clamp_mvs)
      clamp_mv_to_umv_border(&ymv.as_mv, xd);

863
    scale->set_scaled_offsets(scale, mb_row * 16, mb_col * 16);
864

865 866 867
    vp9_build_inter_predictor(base_pre, pre_stride, dst_y, dst_ystride,
                              &ymv, scale, 16, 16,
                              which_mv ? weight : 0, &xd->subpix);
868
  }
869 870
}

871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899
void vp9_build_inter16x16_predictors_mby(MACROBLOCKD *xd,
                                         uint8_t *dst_y,
                                         int dst_ystride,
                                         int mb_row,
                                         int mb_col) {
  int weight = get_implicit_compoundinter_weight(xd, mb_row, mb_col);

  build_inter16x16_predictors_mby_w(xd, dst_y, dst_ystride, weight,
                                    mb_row, mb_col);
}

#else

void vp9_build_inter16x16_predictors_mby(MACROBLOCKD *xd,
                                         uint8_t *dst_y,
                                         int dst_ystride,
                                         int mb_row,
                                         int mb_col) {
  const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0;
  int which_mv;

  for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) {
    const int clamp_mvs = which_mv ?
         xd->mode_info_context->mbmi.need_to_clamp_secondmv :
         xd->mode_info_context->mbmi.need_to_clamp_mvs;

    uint8_t *base_pre = which_mv ? xd->second_pre.y_buffer : xd->pre.y_buffer;
    int pre_stride = which_mv ? xd->second_pre.y_stride : xd->pre.y_stride;
    int_mv ymv;
900 901
    struct scale_factors *scale = &xd->scale_factor[which_mv];

902 903 904 905 906
    ymv.as_int = xd->mode_info_context->mbmi.mv[which_mv].as_int;

    if (clamp_mvs)
      clamp_mv_to_umv_border(&ymv.as_mv, xd);

907
    scale->set_scaled_offsets(scale, mb_row * 16, mb_col * 16);
908

909 910
    vp9_build_inter_predictor(base_pre, pre_stride, dst_y, dst_ystride,
                              &ymv, scale, 16, 16, which_mv, &xd->subpix);
911 912 913 914 915 916 917 918 919 920 921 922 923 924 925
  }
}
#endif

#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT
static void build_inter16x16_predictors_mbuv_w(MACROBLOCKD *xd,
                                               uint8_t *dst_u,
                                               uint8_t *dst_v,
                                               int dst_uvstride,
                                               int weight,
                                               int mb_row,
                                               int mb_col) {
  const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0;
  int which_mv;

926 927 928 929 930
  for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) {
    const int clamp_mvs =
        which_mv ? xd->mode_info_context->mbmi.need_to_clamp_secondmv
                 : xd->mode_info_context->mbmi.need_to_clamp_mvs;
    uint8_t *uptr, *vptr;
931 932
    int pre_stride = which_mv ? xd->second_pre.uv_stride
                              : xd->pre.uv_stride;
933 934 935
    int_mv _o16x16mv;
    int_mv _16x16mv;

936 937
    struct scale_factors *scale = &xd->scale_factor_uv[which_mv];

938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960
    _16x16mv.as_int = xd->mode_info_context->mbmi.mv[which_mv].as_int;

    if (clamp_mvs)
      clamp_mv_to_umv_border(&_16x16mv.as_mv, xd);

    _o16x16mv = _16x16mv;
    /* calc uv motion vectors */
    if (_16x16mv.as_mv.row < 0)
      _16x16mv.as_mv.row -= 1;
    else
      _16x16mv.as_mv.row += 1;

    if (_16x16mv.as_mv.col < 0)
      _16x16mv.as_mv.col -= 1;
    else
      _16x16mv.as_mv.col += 1;

    _16x16mv.as_mv.row /= 2;
    _16x16mv.as_mv.col /= 2;

    uptr = (which_mv ? xd->second_pre.u_buffer : xd->pre.u_buffer);
    vptr = (which_mv ? xd->second_pre.v_buffer : xd->pre.v_buffer);

961
    scale->set_scaled_offsets(scale, mb_row * 16, mb_col * 16);
962

963 964
    vp9_build_inter_predictor_q4(
        uptr, pre_stride, dst_u, dst_uvstride, &_16x16mv, &_o16x16mv,
965
        scale, 8, 8, which_mv ? weight : 0, &xd->subpix);
966 967 968

    vp9_build_inter_predictor_q4(
        vptr, pre_stride, dst_v, dst_uvstride, &_16x16mv, &_o16x16mv,
969
        scale, 8, 8, which_mv ? weight : 0, &xd->subpix);
970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008
  }
}

void vp9_build_inter16x16_predictors_mbuv(MACROBLOCKD *xd,
                                          uint8_t *dst_u,
                                          uint8_t *dst_v,
                                          int dst_uvstride,
                                          int mb_row,
                                          int mb_col) {
#ifdef USE_IMPLICIT_WEIGHT_UV
  int weight = get_implicit_compoundinter_weight(xd, mb_row, mb_col);
#else
  int weight = AVERAGE_WEIGHT;
#endif
  build_inter16x16_predictors_mbuv_w(xd, dst_u, dst_v, dst_uvstride,
                                     weight, mb_row, mb_col);
}

#else

void vp9_build_inter16x16_predictors_mbuv(MACROBLOCKD *xd,
                                          uint8_t *dst_u,
                                          uint8_t *dst_v,
                                          int dst_uvstride,
                                          int mb_row,
                                          int mb_col) {
  const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0;
  int which_mv;

  for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) {
    const int clamp_mvs =
        which_mv ? xd->mode_info_context->mbmi.need_to_clamp_secondmv
                 : xd->mode_info_context->mbmi.need_to_clamp_mvs;
    uint8_t *uptr, *vptr;
    int pre_stride = which_mv ? xd->second_pre.uv_stride
                              : xd->pre.uv_stride;
    int_mv _o16x16mv;
    int_mv _16x16mv;

1009 1010
    struct scale_factors *scale = &xd->scale_factor_uv[which_mv];

1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033
    _16x16mv.as_int = xd->mode_info_context->mbmi.mv[which_mv].as_int;

    if (clamp_mvs)
      clamp_mv_to_umv_border(&_16x16mv.as_mv, xd);

    _o16x16mv = _16x16mv;
    /* calc uv motion vectors */
    if (_16x16mv.as_mv.row < 0)
      _16x16mv.as_mv.row -= 1;
    else
      _16x16mv.as_mv.row += 1;

    if (_16x16mv.as_mv.col < 0)
      _16x16mv.as_mv.col -= 1;
    else
      _16x16mv.as_mv.col += 1;

    _16x16mv.as_mv.row /= 2;
    _16x16mv.as_mv.col /= 2;

    uptr = (which_mv ? xd->second_pre.u_buffer : xd->pre.u_buffer);
    vptr = (which_mv ? xd->second_pre.v_buffer : xd->pre.v_buffer);

1034
    scale->set_scaled_offsets(scale, mb_row * 16, mb_col * 16);
1035 1036 1037

    vp9_build_inter_predictor_q4(
        uptr, pre_stride, dst_u, dst_uvstride, &_16x16mv, &_o16x16mv,
1038
        scale, 8, 8,
1039 1040 1041 1042
        which_mv << (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT), &xd->subpix);

    vp9_build_inter_predictor_q4(
        vptr, pre_stride, dst_v, dst_uvstride, &_16x16mv, &_o16x16mv,
1043
        scale, 8, 8,
1044 1045 1046 1047 1048 1049
        which_mv << (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT), &xd->subpix);
  }
}
#endif

#if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT
1050 1051 1052 1053 1054 1055 1056 1057 1058
static void build_inter_predictors_sby_w(MACROBLOCKD *x,
                                         uint8_t *dst_y,
                                         int dst_ystride,
                                         int weight,
                                         int mb_row,
                                         int mb_col,
                                         BLOCK_SIZE_TYPE bsize) {
  const int bwl = mb_width_log2(bsize),  bw = 1 << bwl;
  const int bhl = mb_height_log2(bsize), bh = 1 << bhl;
1059 1060 1061 1062 1063 1064 1065 1066 1067
  uint8_t *y1 = x->pre.y_buffer;
  uint8_t *y2 = x->second_pre.y_buffer;
  int edge[4], n;

  edge[0] = x->mb_to_top_edge;
  edge[1] = x->mb_to_bottom_edge;
  edge[2] = x->mb_to_left_edge;
  edge[3] = x->mb_to_right_edge;

1068 1069
  for (n = 0; n < bw * bh; n++) {
    const int x_idx = n & (bw - 1), y_idx = n >> bwl;
1070

1071 1072 1073 1074
    x->mb_to_top_edge    = edge[0] -           ((y_idx  * 16) << 3);
    x->mb_to_bottom_edge = edge[1] + (((bh - 1 - y_idx) * 16) << 3);
    x->mb_to_left_edge   = edge[2] -           ((x_idx  * 16) << 3);
    x->mb_to_right_edge  = edge[3] + (((bw - 1 - x_idx) * 16) << 3);
1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101

    x->pre.y_buffer = y1 + scaled_buffer_offset(x_idx * 16,
                                                y_idx * 16,
                                                x->pre.y_stride,
                                                &x->scale_factor[0]);
    if (x->mode_info_context->mbmi.second_ref_frame > 0) {
      x->second_pre.y_buffer = y2 +
          scaled_buffer_offset(x_idx * 16,
                               y_idx * 16,
                               x->second_pre.y_stride,
                               &x->scale_factor[1]);
    }
    build_inter16x16_predictors_mby_w(x,
        dst_y + y_idx * 16 * dst_ystride  + x_idx * 16,
        dst_ystride, weight, mb_row + y_idx, mb_col + x_idx);
  }
  x->mb_to_top_edge    = edge[0];
  x->mb_to_bottom_edge = edge[1];
  x->mb_to_left_edge   = edge[2];
  x->mb_to_right_edge  = edge[3];

  x->pre.y_buffer = y1;
  if (x->mode_info_context->mbmi.second_ref_frame > 0) {
    x->second_pre.y_buffer = y2;
  }
}

1102
void vp9_build_inter_predictors_sby(MACROBLOCKD *x,
1103 1104 1105
                                         uint8_t *dst_y,
                                         int dst_ystride,
                                         int mb_row,
1106 1107
                                         int mb_col,
                                         BLOCK_SIZE_TYPE bsize) {