mcomp.c 120 KB
Newer Older
Jingning Han's avatar
Jingning Han committed
1
/*
Yaowu Xu's avatar
Yaowu Xu committed
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Jingning Han's avatar
Jingning Han committed
3
 *
Yaowu Xu's avatar
Yaowu Xu committed
4 5 6 7 8 9
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Jingning Han's avatar
Jingning Han committed
10 11 12 13 14 15
 */

#include <limits.h>
#include <math.h>
#include <stdio.h>

Yaowu Xu's avatar
Yaowu Xu committed
16 17
#include "./aom_config.h"
#include "./aom_dsp_rtcd.h"
Jingning Han's avatar
Jingning Han committed
18

Yaowu Xu's avatar
Yaowu Xu committed
19 20
#include "aom_dsp/aom_dsp_common.h"
#include "aom_mem/aom_mem.h"
21
#include "aom_ports/mem.h"
Jingning Han's avatar
Jingning Han committed
22

23
#include "av1/common/common.h"
Yunqing Wang's avatar
Yunqing Wang committed
24
#include "av1/common/mvref_common.h"
25
#include "av1/common/reconinter.h"
Jingning Han's avatar
Jingning Han committed
26

27 28 29
#include "av1/encoder/encoder.h"
#include "av1/encoder/mcomp.h"
#include "av1/encoder/rdopt.h"
Jingning Han's avatar
Jingning Han committed
30 31 32 33 34 35 36 37

// #define NEW_DIAMOND_SEARCH

static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf,
                                             const MV *mv) {
  return &buf->buf[mv->row * buf->stride + mv->col];
}

Alex Converse's avatar
Alex Converse committed
38
void av1_set_mv_search_range(MvLimits *mv_limits, const MV *mv) {
Jingning Han's avatar
Jingning Han committed
39 40 41 42 43
  int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0);
  int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0);
  int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL;
  int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL;

Yaowu Xu's avatar
Yaowu Xu committed
44 45 46 47
  col_min = AOMMAX(col_min, (MV_LOW >> 3) + 1);
  row_min = AOMMAX(row_min, (MV_LOW >> 3) + 1);
  col_max = AOMMIN(col_max, (MV_UPP >> 3) - 1);
  row_max = AOMMIN(row_max, (MV_UPP >> 3) - 1);
Jingning Han's avatar
Jingning Han committed
48 49 50

  // Get intersection of UMV window and valid MV window to reduce # of checks
  // in diamond search.
Alex Converse's avatar
Alex Converse committed
51 52 53 54
  if (mv_limits->col_min < col_min) mv_limits->col_min = col_min;
  if (mv_limits->col_max > col_max) mv_limits->col_max = col_max;
  if (mv_limits->row_min < row_min) mv_limits->row_min = row_min;
  if (mv_limits->row_max > row_max) mv_limits->row_max = row_max;
Jingning Han's avatar
Jingning Han committed
55 56
}

57 58 59 60
static void av1_set_subpel_mv_search_range(const MvLimits *mv_limits,
                                           int *col_min, int *col_max,
                                           int *row_min, int *row_max,
                                           const MV *ref_mv) {
61
  const int max_mv = MAX_FULL_PEL_VAL * 8;
Alex Converse's avatar
Alex Converse committed
62 63 64 65
  const int minc = AOMMAX(mv_limits->col_min * 8, ref_mv->col - max_mv);
  const int maxc = AOMMIN(mv_limits->col_max * 8, ref_mv->col + max_mv);
  const int minr = AOMMAX(mv_limits->row_min * 8, ref_mv->row - max_mv);
  const int maxr = AOMMIN(mv_limits->row_max * 8, ref_mv->row + max_mv);
66 67 68 69 70 71 72

  *col_min = AOMMAX(MV_LOW + 1, minc);
  *col_max = AOMMIN(MV_UPP - 1, maxc);
  *row_min = AOMMAX(MV_LOW + 1, minr);
  *row_max = AOMMIN(MV_UPP - 1, maxr);
}

Yaowu Xu's avatar
Yaowu Xu committed
73
int av1_init_search_range(int size) {
Jingning Han's avatar
Jingning Han committed
74 75
  int sr = 0;
  // Minimum search size no matter what the passed in value.
Yaowu Xu's avatar
Yaowu Xu committed
76
  size = AOMMAX(16, size);
Jingning Han's avatar
Jingning Han committed
77

78
  while ((size << sr) < MAX_FULL_PEL_VAL) sr++;
Jingning Han's avatar
Jingning Han committed
79

Yaowu Xu's avatar
Yaowu Xu committed
80
  sr = AOMMIN(sr, MAX_MVSEARCH_STEPS - 2);
Jingning Han's avatar
Jingning Han committed
81 82 83
  return sr;
}

84 85
static INLINE int mv_cost(const MV *mv, const int *joint_cost,
                          int *const comp_cost[2]) {
Yaowu Xu's avatar
Yaowu Xu committed
86
  return joint_cost[av1_get_mv_joint(mv)] + comp_cost[0][mv->row] +
87
         comp_cost[1][mv->col];
Jingning Han's avatar
Jingning Han committed
88 89
}

Yaowu Xu's avatar
Yaowu Xu committed
90 91
int av1_mv_bit_cost(const MV *mv, const MV *ref, const int *mvjcost,
                    int *mvcost[2], int weight) {
92
  const MV diff = { mv->row - ref->row, mv->col - ref->col };
93
  return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7);
Jingning Han's avatar
Jingning Han committed
94 95
}

96 97 98
#define PIXEL_TRANSFORM_ERROR_SCALE 4
static int mv_err_cost(const MV *mv, const MV *ref, const int *mvjcost,
                       int *mvcost[2], int error_per_bit) {
Jingning Han's avatar
Jingning Han committed
99
  if (mvcost) {
100
    const MV diff = { mv->row - ref->row, mv->col - ref->col };
101 102
    return (int)ROUND_POWER_OF_TWO_64(
        (int64_t)mv_cost(&diff, mvjcost, mvcost) * error_per_bit,
Yaowu Xu's avatar
Yaowu Xu committed
103
        RDDIV_BITS + AV1_PROB_COST_SHIFT - RD_EPB_SHIFT +
104
            PIXEL_TRANSFORM_ERROR_SCALE);
Jingning Han's avatar
Jingning Han committed
105 106 107 108 109
  }
  return 0;
}

static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref,
110
                          int sad_per_bit) {
111
  const MV diff = { (mv->row - ref->row) * 8, (mv->col - ref->col) * 8 };
112
  return ROUND_POWER_OF_TWO(
113
      (unsigned)mv_cost(&diff, x->nmvjointcost, x->mvcost) * sad_per_bit,
Yaowu Xu's avatar
Yaowu Xu committed
114
      AV1_PROB_COST_SHIFT);
Jingning Han's avatar
Jingning Han committed
115 116
}

Yaowu Xu's avatar
Yaowu Xu committed
117
void av1_init_dsmotion_compensation(search_site_config *cfg, int stride) {
Jingning Han's avatar
Jingning Han committed
118 119 120 121 122 123 124
  int len, ss_count = 1;

  cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
  cfg->ss[0].offset = 0;

  for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
    // Generate offsets for 4 search sites per step.
125
    const MV ss_mvs[] = { { -len, 0 }, { len, 0 }, { 0, -len }, { 0, len } };
Jingning Han's avatar
Jingning Han committed
126 127 128 129 130 131 132 133 134 135 136 137
    int i;
    for (i = 0; i < 4; ++i) {
      search_site *const ss = &cfg->ss[ss_count++];
      ss->mv = ss_mvs[i];
      ss->offset = ss->mv.row * stride + ss->mv.col;
    }
  }

  cfg->ss_count = ss_count;
  cfg->searches_per_step = 4;
}

Yaowu Xu's avatar
Yaowu Xu committed
138
void av1_init3smotion_compensation(search_site_config *cfg, int stride) {
Jingning Han's avatar
Jingning Han committed
139 140 141 142 143 144 145
  int len, ss_count = 1;

  cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
  cfg->ss[0].offset = 0;

  for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
    // Generate offsets for 8 search sites per step.
146 147 148
    const MV ss_mvs[8] = { { -len, 0 },   { len, 0 },     { 0, -len },
                           { 0, len },    { -len, -len }, { -len, len },
                           { len, -len }, { len, len } };
Jingning Han's avatar
Jingning Han committed
149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171
    int i;
    for (i = 0; i < 8; ++i) {
      search_site *const ss = &cfg->ss[ss_count++];
      ss->mv = ss_mvs[i];
      ss->offset = ss->mv.row * stride + ss->mv.col;
    }
  }

  cfg->ss_count = ss_count;
  cfg->searches_per_step = 8;
}

/*
 * To avoid the penalty for crossing cache-line read, preload the reference
 * area in a small buffer, which is aligned to make sure there won't be crossing
 * cache-line read while reading from this buffer. This reduced the cpu
 * cycles spent on reading ref data in sub-pixel filter functions.
 * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
 * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
 * could reduce the area.
 */

// convert motion vector component to offset for sv[a]f calc
172
static INLINE int sp(int x) { return x & 7; }
Jingning Han's avatar
Jingning Han committed
173 174 175 176 177 178

static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
  return &buf[(r >> 3) * stride + (c >> 3)];
}

/* checks if (r, c) has better score than previous best */
David Barker's avatar
David Barker committed
179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202
#define CHECK_BETTER(v, r, c)                                             \
  if (c >= minc && c <= maxc && r >= minr && r <= maxr) {                 \
    MV this_mv = { r, c };                                                \
    v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);    \
    if (second_pred == NULL)                                              \
      thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r),  \
                         src_address, src_stride, &sse);                  \
    else if (mask)                                                        \
      thismse = vfp->msvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
                          src_address, src_stride, second_pred, mask,     \
                          mask_stride, invert_mask, &sse);                \
    else                                                                  \
      thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
                          src_address, src_stride, &sse, second_pred);    \
    v += thismse;                                                         \
    if (v < besterr) {                                                    \
      besterr = v;                                                        \
      br = r;                                                             \
      bc = c;                                                             \
      *distortion = thismse;                                              \
      *sse1 = sse;                                                        \
    }                                                                     \
  } else {                                                                \
    v = INT_MAX;                                                          \
203
  }
Jingning Han's avatar
Jingning Han committed
204

205 206 207
#define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c)

/* checks if (r, c) has better score than previous best */
208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225
#define CHECK_BETTER1(v, r, c)                                              \
  if (c >= minc && c <= maxc && r >= minr && r <= maxr) {                   \
    MV this_mv = { r, c };                                                  \
    thismse = upsampled_pref_error(xd, vfp, src_address, src_stride,        \
                                   pre(y, y_stride, r, c), y_stride, sp(c), \
                                   sp(r), second_pred, mask, mask_stride,   \
                                   invert_mask, w, h, &sse);                \
    v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);      \
    v += thismse;                                                           \
    if (v < besterr) {                                                      \
      besterr = v;                                                          \
      br = r;                                                               \
      bc = c;                                                               \
      *distortion = thismse;                                                \
      *sse1 = sse;                                                          \
    }                                                                       \
  } else {                                                                  \
    v = INT_MAX;                                                            \
226
  }
227

228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273
#define FIRST_LEVEL_CHECKS                                       \
  {                                                              \
    unsigned int left, right, up, down, diag;                    \
    CHECK_BETTER(left, tr, tc - hstep);                          \
    CHECK_BETTER(right, tr, tc + hstep);                         \
    CHECK_BETTER(up, tr - hstep, tc);                            \
    CHECK_BETTER(down, tr + hstep, tc);                          \
    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);     \
    switch (whichdir) {                                          \
      case 0: CHECK_BETTER(diag, tr - hstep, tc - hstep); break; \
      case 1: CHECK_BETTER(diag, tr - hstep, tc + hstep); break; \
      case 2: CHECK_BETTER(diag, tr + hstep, tc - hstep); break; \
      case 3: CHECK_BETTER(diag, tr + hstep, tc + hstep); break; \
    }                                                            \
  }

#define SECOND_LEVEL_CHECKS                                       \
  {                                                               \
    int kr, kc;                                                   \
    unsigned int second;                                          \
    if (tr != br && tc != bc) {                                   \
      kr = br - tr;                                               \
      kc = bc - tc;                                               \
      CHECK_BETTER(second, tr + kr, tc + 2 * kc);                 \
      CHECK_BETTER(second, tr + 2 * kr, tc + kc);                 \
    } else if (tr == br && tc != bc) {                            \
      kc = bc - tc;                                               \
      CHECK_BETTER(second, tr + hstep, tc + 2 * kc);              \
      CHECK_BETTER(second, tr - hstep, tc + 2 * kc);              \
      switch (whichdir) {                                         \
        case 0:                                                   \
        case 1: CHECK_BETTER(second, tr + hstep, tc + kc); break; \
        case 2:                                                   \
        case 3: CHECK_BETTER(second, tr - hstep, tc + kc); break; \
      }                                                           \
    } else if (tr != br && tc == bc) {                            \
      kr = br - tr;                                               \
      CHECK_BETTER(second, tr + 2 * kr, tc + hstep);              \
      CHECK_BETTER(second, tr + 2 * kr, tc - hstep);              \
      switch (whichdir) {                                         \
        case 0:                                                   \
        case 2: CHECK_BETTER(second, tr + kr, tc + hstep); break; \
        case 1:                                                   \
        case 3: CHECK_BETTER(second, tr + kr, tc - hstep); break; \
      }                                                           \
    }                                                             \
Jingning Han's avatar
Jingning Han committed
274 275
  }

276 277 278
// TODO(yunqingwang): SECOND_LEVEL_CHECKS_BEST was a rewrote of
// SECOND_LEVEL_CHECKS, and SECOND_LEVEL_CHECKS should be rewritten
// later in the same way.
279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296
#define SECOND_LEVEL_CHECKS_BEST(k)                \
  {                                                \
    unsigned int second;                           \
    int br0 = br;                                  \
    int bc0 = bc;                                  \
    assert(tr == br || tc == bc);                  \
    if (tr == br && tc != bc) {                    \
      kc = bc - tc;                                \
    } else if (tr != br && tc == bc) {             \
      kr = br - tr;                                \
    }                                              \
    CHECK_BETTER##k(second, br0 + kr, bc0);        \
    CHECK_BETTER##k(second, br0, bc0 + kc);        \
    if (br0 != br || bc0 != bc) {                  \
      CHECK_BETTER##k(second, br0 + kr, bc0 + kc); \
    }                                              \
  }

Alex Converse's avatar
Alex Converse committed
297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323
#define SETUP_SUBPEL_SEARCH                                                 \
  const uint8_t *const src_address = x->plane[0].src.buf;                   \
  const int src_stride = x->plane[0].src.stride;                            \
  const MACROBLOCKD *xd = &x->e_mbd;                                        \
  unsigned int besterr = INT_MAX;                                           \
  unsigned int sse;                                                         \
  unsigned int whichdir;                                                    \
  int thismse;                                                              \
  MV *bestmv = &x->best_mv.as_mv;                                           \
  const unsigned int halfiters = iters_per_step;                            \
  const unsigned int quarteriters = iters_per_step;                         \
  const unsigned int eighthiters = iters_per_step;                          \
  const int y_stride = xd->plane[0].pre[0].stride;                          \
  const int offset = bestmv->row * y_stride + bestmv->col;                  \
  const uint8_t *const y = xd->plane[0].pre[0].buf;                         \
                                                                            \
  int br = bestmv->row * 8;                                                 \
  int bc = bestmv->col * 8;                                                 \
  int hstep = 4;                                                            \
  int minc, maxc, minr, maxr;                                               \
  int tr = br;                                                              \
  int tc = bc;                                                              \
                                                                            \
  av1_set_subpel_mv_search_range(&x->mv_limits, &minc, &maxc, &minr, &maxr, \
                                 ref_mv);                                   \
                                                                            \
  bestmv->row *= 8;                                                         \
Jingning Han's avatar
Jingning Han committed
324 325
  bestmv->col *= 8;

326 327
static unsigned int setup_center_error(
    const MACROBLOCKD *xd, const MV *bestmv, const MV *ref_mv,
Yaowu Xu's avatar
Yaowu Xu committed
328
    int error_per_bit, const aom_variance_fn_ptr_t *vfp,
329
    const uint8_t *const src, const int src_stride, const uint8_t *const y,
330 331 332
    int y_stride, const uint8_t *second_pred, const uint8_t *mask,
    int mask_stride, int invert_mask, int w, int h, int offset, int *mvjcost,
    int *mvcost[2], unsigned int *sse1, int *distortion) {
Jingning Han's avatar
Jingning Han committed
333
  unsigned int besterr;
334
#if CONFIG_HIGHBITDEPTH
Jingning Han's avatar
Jingning Han committed
335 336
  if (second_pred != NULL) {
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
337
      DECLARE_ALIGNED(16, uint16_t, comp_pred16[MAX_SB_SQUARE]);
338 339 340 341 342 343
      if (mask)
        aom_highbd_comp_mask_pred(comp_pred16, second_pred, w, h, y + offset,
                                  y_stride, mask, mask_stride, invert_mask);
      else
        aom_highbd_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset,
                                 y_stride);
344 345
      besterr =
          vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src, src_stride, sse1);
Jingning Han's avatar
Jingning Han committed
346
    } else {
347
      DECLARE_ALIGNED(16, uint8_t, comp_pred[MAX_SB_SQUARE]);
348 349 350 351 352
      if (mask)
        aom_comp_mask_pred(comp_pred, second_pred, w, h, y + offset, y_stride,
                           mask, mask_stride, invert_mask);
      else
        aom_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
Jingning Han's avatar
Jingning Han committed
353 354 355 356 357 358 359 360
      besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
    }
  } else {
    besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1);
  }
  *distortion = besterr;
  besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
#else
361
  (void)xd;
Jingning Han's avatar
Jingning Han committed
362
  if (second_pred != NULL) {
363
    DECLARE_ALIGNED(16, uint8_t, comp_pred[MAX_SB_SQUARE]);
364 365 366 367 368
    if (mask)
      aom_comp_mask_pred(comp_pred, second_pred, w, h, y + offset, y_stride,
                         mask, mask_stride, invert_mask);
    else
      aom_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
Jingning Han's avatar
Jingning Han committed
369 370 371 372 373 374
    besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
  } else {
    besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1);
  }
  *distortion = besterr;
  besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
375
#endif  // CONFIG_HIGHBITDEPTH
Jingning Han's avatar
Jingning Han committed
376 377 378
  return besterr;
}

379
static INLINE int divide_and_round(int n, int d) {
Jingning Han's avatar
Jingning Han committed
380 381 382 383
  return ((n < 0) ^ (d < 0)) ? ((n - d / 2) / d) : ((n + d / 2) / d);
}

static INLINE int is_cost_list_wellbehaved(int *cost_list) {
384 385
  return cost_list[0] < cost_list[1] && cost_list[0] < cost_list[2] &&
         cost_list[0] < cost_list[3] && cost_list[0] < cost_list[4];
Jingning Han's avatar
Jingning Han committed
386 387 388 389 390 391 392 393 394 395
}

// Returns surface minima estimate at given precision in 1/2^n bits.
// Assume a model for the cost surface: S = A(x - x0)^2 + B(y - y0)^2 + C
// For a given set of costs S0, S1, S2, S3, S4 at points
// (y, x) = (0, 0), (0, -1), (1, 0), (0, 1) and (-1, 0) respectively,
// the solution for the location of the minima (x0, y0) is given by:
// x0 = 1/2 (S1 - S3)/(S1 + S3 - 2*S0),
// y0 = 1/2 (S4 - S2)/(S4 + S2 - 2*S0).
// The code below is an integerized version of that.
396
static void get_cost_surf_min(int *cost_list, int *ir, int *ic, int bits) {
Jingning Han's avatar
Jingning Han committed
397 398 399 400 401 402
  *ic = divide_and_round((cost_list[1] - cost_list[3]) * (1 << (bits - 1)),
                         (cost_list[1] - 2 * cost_list[0] + cost_list[3]));
  *ir = divide_and_round((cost_list[4] - cost_list[2]) * (1 << (bits - 1)),
                         (cost_list[4] - 2 * cost_list[0] + cost_list[2]));
}

Yaowu Xu's avatar
Yaowu Xu committed
403
int av1_find_best_sub_pixel_tree_pruned_evenmore(
404
    MACROBLOCK *x, const MV *ref_mv, int allow_hp, int error_per_bit,
Yaowu Xu's avatar
Yaowu Xu committed
405
    const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
406
    int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
407 408
    unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask,
    int mask_stride, int invert_mask, int w, int h, int use_upsampled_ref) {
Jingning Han's avatar
Jingning Han committed
409
  SETUP_SUBPEL_SEARCH;
410 411 412 413
  besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
                               src_address, src_stride, y, y_stride,
                               second_pred, mask, mask_stride, invert_mask, w,
                               h, offset, mvjcost, mvcost, sse1, distortion);
414 415 416 417 418 419 420 421 422 423
  (void)halfiters;
  (void)quarteriters;
  (void)eighthiters;
  (void)whichdir;
  (void)allow_hp;
  (void)forced_stop;
  (void)hstep;
  (void)use_upsampled_ref;

  if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
Jingning Han's avatar
Jingning Han committed
424
      cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
425
      cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) {
Jingning Han's avatar
Jingning Han committed
426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455
    int ir, ic;
    unsigned int minpt;
    get_cost_surf_min(cost_list, &ir, &ic, 2);
    if (ir != 0 || ic != 0) {
      CHECK_BETTER(minpt, tr + 2 * ir, tc + 2 * ic);
    }
  } else {
    FIRST_LEVEL_CHECKS;
    if (halfiters > 1) {
      SECOND_LEVEL_CHECKS;
    }

    tr = br;
    tc = bc;

    // Each subsequent iteration checks at least one point in common with
    // the last iteration could be 2 ( if diag selected) 1/4 pel
    // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
    if (forced_stop != 2) {
      hstep >>= 1;
      FIRST_LEVEL_CHECKS;
      if (quarteriters > 1) {
        SECOND_LEVEL_CHECKS;
      }
    }
  }

  tr = br;
  tc = bc;

Alex Converse's avatar
Alex Converse committed
456
  if (allow_hp && forced_stop == 0) {
Jingning Han's avatar
Jingning Han committed
457 458 459 460 461 462 463 464 465 466 467 468 469
    hstep >>= 1;
    FIRST_LEVEL_CHECKS;
    if (eighthiters > 1) {
      SECOND_LEVEL_CHECKS;
    }
  }

  bestmv->row = br;
  bestmv->col = bc;

  return besterr;
}

Yaowu Xu's avatar
Yaowu Xu committed
470
int av1_find_best_sub_pixel_tree_pruned_more(
471
    MACROBLOCK *x, const MV *ref_mv, int allow_hp, int error_per_bit,
Yaowu Xu's avatar
Yaowu Xu committed
472
    const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
473
    int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
474 475
    unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask,
    int mask_stride, int invert_mask, int w, int h, int use_upsampled_ref) {
Jingning Han's avatar
Jingning Han committed
476
  SETUP_SUBPEL_SEARCH;
477 478
  (void)use_upsampled_ref;

479 480 481 482
  besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
                               src_address, src_stride, y, y_stride,
                               second_pred, mask, mask_stride, invert_mask, w,
                               h, offset, mvjcost, mvcost, sse1, distortion);
483
  if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
Jingning Han's avatar
Jingning Han committed
484
      cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
485
      cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) {
Jingning Han's avatar
Jingning Han committed
486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512
    unsigned int minpt;
    int ir, ic;
    get_cost_surf_min(cost_list, &ir, &ic, 1);
    if (ir != 0 || ic != 0) {
      CHECK_BETTER(minpt, tr + ir * hstep, tc + ic * hstep);
    }
  } else {
    FIRST_LEVEL_CHECKS;
    if (halfiters > 1) {
      SECOND_LEVEL_CHECKS;
    }
  }

  // Each subsequent iteration checks at least one point in common with
  // the last iteration could be 2 ( if diag selected) 1/4 pel

  // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
  if (forced_stop != 2) {
    tr = br;
    tc = bc;
    hstep >>= 1;
    FIRST_LEVEL_CHECKS;
    if (quarteriters > 1) {
      SECOND_LEVEL_CHECKS;
    }
  }

Alex Converse's avatar
Alex Converse committed
513
  if (allow_hp && forced_stop == 0) {
Jingning Han's avatar
Jingning Han committed
514 515 516 517 518 519 520 521 522 523
    tr = br;
    tc = bc;
    hstep >>= 1;
    FIRST_LEVEL_CHECKS;
    if (eighthiters > 1) {
      SECOND_LEVEL_CHECKS;
    }
  }
  // These lines insure static analysis doesn't warn that
  // tr and tc aren't used after the above point.
524 525
  (void)tr;
  (void)tc;
Jingning Han's avatar
Jingning Han committed
526 527 528 529 530 531 532

  bestmv->row = br;
  bestmv->col = bc;

  return besterr;
}

Yaowu Xu's avatar
Yaowu Xu committed
533
int av1_find_best_sub_pixel_tree_pruned(
534
    MACROBLOCK *x, const MV *ref_mv, int allow_hp, int error_per_bit,
Yaowu Xu's avatar
Yaowu Xu committed
535
    const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
536
    int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
537 538
    unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask,
    int mask_stride, int invert_mask, int w, int h, int use_upsampled_ref) {
Jingning Han's avatar
Jingning Han committed
539
  SETUP_SUBPEL_SEARCH;
540 541
  (void)use_upsampled_ref;

542 543 544 545
  besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
                               src_address, src_stride, y, y_stride,
                               second_pred, mask, mask_stride, invert_mask, w,
                               h, offset, mvjcost, mvcost, sse1, distortion);
546
  if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
Jingning Han's avatar
Jingning Han committed
547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597
      cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
      cost_list[4] != INT_MAX) {
    unsigned int left, right, up, down, diag;
    whichdir = (cost_list[1] < cost_list[3] ? 0 : 1) +
               (cost_list[2] < cost_list[4] ? 0 : 2);
    switch (whichdir) {
      case 0:
        CHECK_BETTER(left, tr, tc - hstep);
        CHECK_BETTER(down, tr + hstep, tc);
        CHECK_BETTER(diag, tr + hstep, tc - hstep);
        break;
      case 1:
        CHECK_BETTER(right, tr, tc + hstep);
        CHECK_BETTER(down, tr + hstep, tc);
        CHECK_BETTER(diag, tr + hstep, tc + hstep);
        break;
      case 2:
        CHECK_BETTER(left, tr, tc - hstep);
        CHECK_BETTER(up, tr - hstep, tc);
        CHECK_BETTER(diag, tr - hstep, tc - hstep);
        break;
      case 3:
        CHECK_BETTER(right, tr, tc + hstep);
        CHECK_BETTER(up, tr - hstep, tc);
        CHECK_BETTER(diag, tr - hstep, tc + hstep);
        break;
    }
  } else {
    FIRST_LEVEL_CHECKS;
    if (halfiters > 1) {
      SECOND_LEVEL_CHECKS;
    }
  }

  tr = br;
  tc = bc;

  // Each subsequent iteration checks at least one point in common with
  // the last iteration could be 2 ( if diag selected) 1/4 pel

  // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
  if (forced_stop != 2) {
    hstep >>= 1;
    FIRST_LEVEL_CHECKS;
    if (quarteriters > 1) {
      SECOND_LEVEL_CHECKS;
    }
    tr = br;
    tc = bc;
  }

Alex Converse's avatar
Alex Converse committed
598
  if (allow_hp && forced_stop == 0) {
Jingning Han's avatar
Jingning Han committed
599 600 601 602 603 604 605 606 607 608
    hstep >>= 1;
    FIRST_LEVEL_CHECKS;
    if (eighthiters > 1) {
      SECOND_LEVEL_CHECKS;
    }
    tr = br;
    tc = bc;
  }
  // These lines insure static analysis doesn't warn that
  // tr and tc aren't used after the above point.
609 610
  (void)tr;
  (void)tc;
Jingning Han's avatar
Jingning Han committed
611 612 613 614 615 616 617

  bestmv->row = br;
  bestmv->col = bc;

  return besterr;
}

618
/* clang-format off */
Jingning Han's avatar
Jingning Han committed
619
static const MV search_step_table[12] = {
620 621 622 623
  // left, right, up, down
  { 0, -4 }, { 0, 4 }, { -4, 0 }, { 4, 0 },
  { 0, -2 }, { 0, 2 }, { -2, 0 }, { 2, 0 },
  { 0, -1 }, { 0, 1 }, { -1, 0 }, { 1, 0 }
Jingning Han's avatar
Jingning Han committed
624
};
625
/* clang-format on */
Jingning Han's avatar
Jingning Han committed
626

627 628 629 630 631 632 633 634
static int upsampled_pref_error(const MACROBLOCKD *xd,
                                const aom_variance_fn_ptr_t *vfp,
                                const uint8_t *const src, const int src_stride,
                                const uint8_t *const y, int y_stride,
                                int subpel_x_q3, int subpel_y_q3,
                                const uint8_t *second_pred, const uint8_t *mask,
                                int mask_stride, int invert_mask, int w, int h,
                                unsigned int *sse) {
635
  unsigned int besterr;
636
#if CONFIG_HIGHBITDEPTH
637
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
638
    DECLARE_ALIGNED(16, uint16_t, pred16[MAX_SB_SQUARE]);
639 640
    if (second_pred != NULL) {
      if (mask)
641 642
        aom_highbd_comp_mask_upsampled_pred(
            pred16, second_pred, w, h, subpel_x_q3, subpel_y_q3, y, y_stride,
643
            mask, mask_stride, invert_mask, xd->bd);
644
      else
645 646
        aom_highbd_comp_avg_upsampled_pred(pred16, second_pred, w, h,
                                           subpel_x_q3, subpel_y_q3, y,
647
                                           y_stride, xd->bd);
648
    } else {
649
      aom_highbd_upsampled_pred(pred16, w, h, subpel_x_q3, subpel_y_q3, y,
650
                                y_stride, xd->bd);
651
    }
652

653
    besterr = vfp->vf(CONVERT_TO_BYTEPTR(pred16), w, src, src_stride, sse);
654
  } else {
655
    DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]);
656
#else
657 658
  DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]);
  (void)xd;
659
#endif  // CONFIG_HIGHBITDEPTH
660 661
    if (second_pred != NULL) {
      if (mask)
662 663
        aom_comp_mask_upsampled_pred(pred, second_pred, w, h, subpel_x_q3,
                                     subpel_y_q3, y, y_stride, mask,
664 665
                                     mask_stride, invert_mask);
      else
666 667
        aom_comp_avg_upsampled_pred(pred, second_pred, w, h, subpel_x_q3,
                                    subpel_y_q3, y, y_stride);
668
    } else {
669
      aom_upsampled_pred(pred, w, h, subpel_x_q3, subpel_y_q3, y, y_stride);
670
    }
671 672

    besterr = vfp->vf(pred, w, src, src_stride, sse);
673
#if CONFIG_HIGHBITDEPTH
674 675
  }
#endif
676
  return besterr;
677 678 679 680
}

static unsigned int upsampled_setup_center_error(
    const MACROBLOCKD *xd, const MV *bestmv, const MV *ref_mv,
Yaowu Xu's avatar
Yaowu Xu committed
681
    int error_per_bit, const aom_variance_fn_ptr_t *vfp,
682
    const uint8_t *const src, const int src_stride, const uint8_t *const y,
683 684 685
    int y_stride, const uint8_t *second_pred, const uint8_t *mask,
    int mask_stride, int invert_mask, int w, int h, int offset, int *mvjcost,
    int *mvcost[2], unsigned int *sse1, int *distortion) {
686
  unsigned int besterr = upsampled_pref_error(
687 688
      xd, vfp, src, src_stride, y + offset, y_stride, 0, 0, second_pred, mask,
      mask_stride, invert_mask, w, h, sse1);
689 690 691 692 693
  *distortion = besterr;
  besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
  return besterr;
}

694 695 696 697
int av1_find_best_sub_pixel_tree(
    MACROBLOCK *x, const MV *ref_mv, int allow_hp, int error_per_bit,
    const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
    int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
698 699
    unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask,
    int mask_stride, int invert_mask, int w, int h, int use_upsampled_ref) {
700
  const uint8_t *const src_address = x->plane[0].src.buf;
Jingning Han's avatar
Jingning Han committed
701 702 703 704
  const int src_stride = x->plane[0].src.stride;
  const MACROBLOCKD *xd = &x->e_mbd;
  unsigned int besterr = INT_MAX;
  unsigned int sse;
705
  unsigned int thismse;
Jingning Han's avatar
Jingning Han committed
706
  const int y_stride = xd->plane[0].pre[0].stride;
hui su's avatar
hui su committed
707
  MV *bestmv = &x->best_mv.as_mv;
Jingning Han's avatar
Jingning Han committed
708 709 710 711 712 713 714 715 716 717 718 719
  const int offset = bestmv->row * y_stride + bestmv->col;
  const uint8_t *const y = xd->plane[0].pre[0].buf;

  int br = bestmv->row * 8;
  int bc = bestmv->col * 8;
  int hstep = 4;
  int iter, round = 3 - forced_stop;
  int tr = br;
  int tc = bc;
  const MV *search_step = search_step_table;
  int idx, best_idx = -1;
  unsigned int cost_array[5];
720
  int kr, kc;
721 722
  int minc, maxc, minr, maxr;

Alex Converse's avatar
Alex Converse committed
723 724
  av1_set_subpel_mv_search_range(&x->mv_limits, &minc, &maxc, &minr, &maxr,
                                 ref_mv);
Jingning Han's avatar
Jingning Han committed
725

Alex Converse's avatar
Alex Converse committed
726
  if (!allow_hp)
727
    if (round == 3) round = 2;
Jingning Han's avatar
Jingning Han committed
728 729 730 731

  bestmv->row *= 8;
  bestmv->col *= 8;

732 733
  // use_upsampled_ref can be 0 or 1
  if (use_upsampled_ref)
734 735
    besterr = upsampled_setup_center_error(
        xd, bestmv, ref_mv, error_per_bit, vfp, src_address, src_stride, y,
736 737
        y_stride, second_pred, mask, mask_stride, invert_mask, w, h, offset,
        mvjcost, mvcost, sse1, distortion);
738
  else
739 740 741 742
    besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
                                 src_address, src_stride, y, y_stride,
                                 second_pred, mask, mask_stride, invert_mask, w,
                                 h, offset, mvjcost, mvcost, sse1, distortion);
Jingning Han's avatar
Jingning Han committed
743

744
  (void)cost_list;  // to silence compiler warning
Jingning Han's avatar
Jingning Han committed
745 746 747 748 749 750 751

  for (iter = 0; iter < round; ++iter) {
    // Check vertical and horizontal sub-pixel positions.
    for (idx = 0; idx < 4; ++idx) {
      tr = br + search_step[idx].row;
      tc = bc + search_step[idx].col;
      if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
752
        MV this_mv = { tr, tc };
753 754 755

        if (use_upsampled_ref) {
          thismse = upsampled_pref_error(xd, vfp, src_address, src_stride,
756
                                         pre(y, y_stride, tr, tc), y_stride,
757 758
                                         sp(tc), sp(tr), second_pred, mask,
                                         mask_stride, invert_mask, w, h, &sse);
759
        } else {
760
          const uint8_t *const pre_address = pre(y, y_stride, tr, tc);
761 762 763
          if (second_pred == NULL)
            thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
                               src_address, src_stride, &sse);
764
          else if (mask)
David Barker's avatar
David Barker committed
765 766 767
            thismse = vfp->msvf(pre_address, y_stride, sp(tc), sp(tr),
                                src_address, src_stride, second_pred, mask,
                                mask_stride, invert_mask, &sse);
768 769 770 771 772
          else
            thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
                                src_address, src_stride, &sse, second_pred);
        }

773 774
        cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost,
                                                mvcost, error_per_bit);
Jingning Han's avatar
Jingning Han committed
775 776 777 778 779 780 781 782 783 784 785 786 787

        if (cost_array[idx] < besterr) {
          best_idx = idx;
          besterr = cost_array[idx];
          *distortion = thismse;
          *sse1 = sse;
        }
      } else {
        cost_array[idx] = INT_MAX;
      }
    }

    // Check diagonal sub-pixel position
788 789 790 791 792
    kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep);
    kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep);

    tc = bc + kc;
    tr = br + kr;
Jingning Han's avatar
Jingning Han committed
793
    if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
794
      MV this_mv = { tr, tc };
795 796

      if (use_upsampled_ref) {
797
        thismse = upsampled_pref_error(xd, vfp, src_address, src_stride,
798
                                       pre(y, y_stride, tr, tc), y_stride,
799 800
                                       sp(tc), sp(tr), second_pred, mask,
                                       mask_stride, invert_mask, w, h, &sse);
801
      } else {
802
        const uint8_t *const pre_address = pre(y, y_stride, tr, tc);
803 804

        if (second_pred == NULL)
805 806
          thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address,
                             src_stride, &sse);
807
        else if (mask)
David Barker's avatar
David Barker committed
808 809 810
          thismse = vfp->msvf(pre_address, y_stride, sp(tc), sp(tr),
                              src_address, src_stride, second_pred, mask,
                              mask_stride, invert_mask, &sse);
811 812 813 814 815
        else
          thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
                              src_address, src_stride, &sse, second_pred);
      }

816 817
      cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
                                            error_per_bit);
Jingning Han's avatar
Jingning Han committed
818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836

      if (cost_array[4] < besterr) {
        best_idx = 4;
        besterr = cost_array[4];
        *distortion = thismse;
        *sse1 = sse;
      }
    } else {
      cost_array[idx] = INT_MAX;
    }

    if (best_idx < 4 && best_idx >= 0) {
      br += search_step[best_idx].row;
      bc += search_step[best_idx].col;
    } else if (best_idx == 4) {
      br = tr;
      bc = tc;
    }

837 838 839 840 841 842 843
    if (iters_per_step > 1 && best_idx != -1) {
      if (use_upsampled_ref) {
        SECOND_LEVEL_CHECKS_BEST(1);
      } else {
        SECOND_LEVEL_CHECKS_BEST(0);
      }
    }
Jingning Han's avatar
Jingning Han committed
844 845 846 847 848 849 850 851

    search_step += 4;
    hstep >>= 1;
    best_idx = -1;
  }

  // These lines insure static analysis doesn't warn that
  // tr and tc aren't used after the above point.
852 853
  (void)tr;
  (void)tc;
Jingning Han's avatar
Jingning Han committed
854 855 856 857 858 859 860 861 862 863

  bestmv->row = br;
  bestmv->col = bc;

  return besterr;
}

#undef PRE
#undef CHECK_BETTER

864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888
#if CONFIG_WARPED_MOTION
unsigned int av1_compute_motion_cost(const AV1_COMP *cpi, MACROBLOCK *const x,
                                     BLOCK_SIZE bsize, int mi_row, int mi_col,
                                     const MV *this_mv) {
  const AV1_COMMON *const cm = &cpi->common;
  MACROBLOCKD *xd = &x->e_mbd;
  MODE_INFO *mi = xd->mi[0];
  MB_MODE_INFO *mbmi = &mi->mbmi;
  const uint8_t *const src = x->plane[0].src.buf;
  const int src_stride = x->plane[0].src.stride;
  uint8_t *const dst = xd->plane[0].dst.buf;
  const int dst_stride = xd->plane[0].dst.stride;
  const aom_variance_fn_ptr_t *vfp = &cpi->fn_ptr[bsize];
  const MV ref_mv = x->mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0].as_mv;
  unsigned int mse;
  unsigned int sse;

  av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, NULL, bsize);
  mse = vfp->vf(dst, dst_stride, src, src_stride, &sse);
  mse +=
      mv_err_cost(this_mv, &ref_mv, x->nmvjointcost, x->mvcost, x->errorperbit);
  return mse;
}

// Refine MV in a small range
889 890 891 892 893 894
#if WARPED_MOTION_SORT_SAMPLES
unsigned int av1_refine_warped_mv(const AV1_COMP *cpi, MACROBLOCK *const x,
                                  BLOCK_SIZE bsize, int mi_row, int mi_col,
                                  int *pts0, int *pts_inref0, int *pts_mv0,
                                  int total_samples) {
#else
895 896 897
unsigned int av1_refine_warped_mv(const AV1_COMP *cpi, MACROBLOCK *const x,
                                  BLOCK_SIZE bsize, int mi_row, int mi_col,
                                  int *pts, int *pts_inref) {
898
#endif  // WARPED_MOTION_SORT_SAMPLES
899 900 901 902 903 904 905 906 907 908 909 910
  const AV1_COMMON *const cm = &cpi->common;
  MACROBLOCKD *xd = &x->e_mbd;
  MODE_INFO *mi = xd->mi[0];
  MB_MODE_INFO *mbmi = &mi->mbmi;
  const MV neighbors[8] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 },
                            { 0, -2 }, { 2, 0 }, { 0, 2 }, { -2, 0 } };
  const MV ref_mv = x->mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0].as_mv;
  int16_t br = mbmi->mv[0].as_mv.row;
  int16_t bc = mbmi->mv[0].as_mv.col;
  int16_t *tr = &mbmi->mv[0].as_mv.row;
  int16_t *tc = &mbmi->mv[0].as_mv.col;
  WarpedMotionParams best_wm_params = mbmi->wm_params[0];
911 912 913
#if WARPED_MOTION_SORT_SAMPLES
  int best_num_proj_ref = mbmi->num_proj_ref[0];
#endif  // WARPED_MOTION_SORT_SAMPLES
914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939
  unsigned int bestmse;
  int minc, maxc, minr, maxr;
  const int start = cm->allow_high_precision_mv ? 0 : 4;
  int ite;

  av1_set_subpel_mv_search_range(&x->mv_limits, &minc, &maxc, &minr, &maxr,
                                 &ref_mv);

  // Calculate the center position's error
  assert(bc >= minc && bc <= maxc && br >= minr && br <= maxr);
  bestmse = av1_compute_motion_cost(cpi, x, bsize, mi_row, mi_col,
                                    &mbmi->mv[0].as_mv);

  // MV search
  for (ite = 0; ite < 2; ++ite) {
    int best_idx = -1;
    int idx;

    for (idx = start; idx < start + 4; ++idx) {
      unsigned int thismse;

      *tr = br + neighbors[idx].row;
      *tc = bc + neighbors[idx].col;

      if (*tc >= minc && *tc <= maxc && *tr >= minr && *tr <= maxr) {
        MV this_mv = { *tr, *tc };
940 941 942 943 944 945 946 947 948 949
#if WARPED_MOTION_SORT_SAMPLES
        int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];

        memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0));
        memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
        if (total_samples > 1)
          mbmi->num_proj_ref[0] =
              sortSamples(pts_mv0, &this_mv, pts, pts_inref, total_samples);
#endif  // WARPED_MOTION_SORT_SAMPLES

950 951 952 953 954 955 956 957
        if (!find_projection(mbmi->num_proj_ref[0], pts, pts_inref, bsize, *tr,
                             *tc, &mbmi->wm_params[0], mi_row, mi_col)) {
          thismse =
              av1_compute_motion_cost(cpi, x, bsize, mi_row, mi_col, &this_mv);

          if (thismse < bestmse) {
            best_idx = idx;
            best_wm_params = mbmi->wm_params[0];
958 959 960
#if WARPED_MOTION_SORT_SAMPLES
            best_num_proj_ref = mbmi->num_proj_ref[0];
#endif  // WARPED_MOTION_SORT_SAMPLES
961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977
            bestmse = thismse;
          }
        }
      }
    }

    if (best_idx == -1) break;

    if (best_idx >= 0) {
      br += neighbors[best_idx].row;
      bc += neighbors[best_idx].col;
    }
  }

  *tr = br;
  *tc = bc;
  mbmi->wm_params[0] = best_wm_params;
978 979 980
#if WARPED_MOTION_SORT_SAMPLES
  mbmi->num_proj_ref[0] = best_num_proj_ref;
#endif  // WARPED_MOTION_SORT_SAMPLES
981 982 983 984
  return bestmse;
}
#endif  // CONFIG_WARPED_MOTION

Alex Converse's avatar
Alex Converse committed
985
static INLINE int check_bounds(const MvLimits *mv_limits, int row, int col,
Jingning Han's avatar
Jingning Han committed
986
                               int range) {
Alex Converse's avatar
Alex Converse committed
987 988 989 990
  return ((row - range) >= mv_limits->row_min) &
         ((row + range) <= mv_limits->row_max) &
         ((col - range) >= mv_limits->col_min) &
         ((col + range) <= mv_limits->col_max);
Jingning Han's avatar
Jingning Han committed
991 992
}

Alex Converse's avatar
Alex Converse committed
993 994 995
static INLINE int is_mv_in(const MvLimits *mv_limits, const MV *mv) {
  return (mv->col >= mv_limits->col_min) && (mv->col <= mv_limits->col_max) &&
         (mv->row >= mv_limits->row_min) && (mv->row <= mv_limits->row_max);
Jingning Han's avatar
Jingning Han committed
996 997
}

998 999 1000 1001 1002 1003 1004 1005 1006 1007
#define CHECK_BETTER                                                      \
  {                                                                       \
    if (thissad < bestsad) {                                              \
      if (use_mvcost)                                                     \
        thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); \
      if (thissad < bestsad) {                                            \
        bestsad = thissad;                                                \
        best_site = i;                                                    \
      }                                                                   \
    }                                                                     \
Jingning Han's avatar
Jingning Han committed
1008 1009
  }

1010 1011 1012
#define MAX_PATTERN_SCALES 11
#define MAX_PATTERN_CANDIDATES 8  // max number of canddiates per scale
#define PATTERN_CANDIDATES_REF 3  // number of refinement candidates
Jingning Han's avatar
Jingning Han committed
1013 1014 1015

// Calculate and return a sad+mvcost list around an integer best pel.
static INLINE void calc_int_cost_list(const MACROBLOCK *x,
1016
                                      const MV *const ref_mv, int sadpb,
Yaowu Xu's avatar
Yaowu Xu committed
1017
                                      const aom_variance_fn_ptr_t *fn_ptr,
1018 1019
                                      const MV *best_mv, int *cost_list) {
  static const MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } };
Jingning Han's avatar
Jingning Han committed
1020 1021
  const struct buf_2d *const what = &x->plane[0].src;
  const struct buf_2d *const in_what = &x->e_mbd.plane[0].pre[0];
1022
  const MV fcenter_mv = { ref_mv->row >> 3, ref_mv->col >> 3 };
hui su's avatar
hui su committed
1023 1024
  const int br = best_mv->row;
  const int bc = best_mv->col;
Jingning Han's avatar
Jingning Han committed
1025 1026
  int i;
  unsigned int sse;
1027
  const MV this_mv = { br, bc };
Jingning Han's avatar
Jingning Han committed
1028

1029 1030 1031
  cost_list[0] =
      fn_ptr->vf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv),
                 in_what->stride, &sse) +
Jingning Han's avatar
Jingning Han committed
1032
      mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb);
Alex Converse's avatar
Alex Converse committed
1033
  if (check_bounds(&x->mv_limits, br, bc, 1)) {
Jingning Han's avatar
Jingning Han committed
1034
    for (i = 0; i < 4; i++) {
1035
      const MV neighbor_mv = { br + neighbors[i].row, bc + neighbors[i].col };
Jingning Han's avatar
Jingning Han committed
1036
      cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride,
1037
                                    get_buf_from_mv(in_what, &neighbor_mv),
Jingning Han's avatar
Jingning Han committed
1038
                                    in_what->stride, &sse) +
1039
                         mv_err_cost(&neighbor_mv, &fcenter_mv, x->nmvjointcost,
1040
                                     x->mvcost, x->errorperbit);
Jingning Han's avatar
Jingning Han committed
1041 1042 1043
    }
  } else {
    for (i = 0; i < 4; i++) {
1044
      const MV neighbor_mv = { br + neighbors[i].row, bc + neighbors[i].col };
Alex Converse's avatar
Alex Converse committed
1045
      if (!is_mv_in(&x->mv_limits, &neighbor_mv))
Jingning Han's avatar
Jingning Han committed
1046 1047
        cost_list[i + 1] = INT_MAX;
      else
1048 1049 1050 1051 1052 1053
        cost_list[i + 1] =
            fn_ptr->vf(what->buf, what->stride,
                       get_buf_from_mv(in_what, &neighbor_mv), in_what->stride,
                       &sse) +
            mv_err_cost(&neighbor_mv, &fcenter_mv, x->nmvjointcost, x->mvcost,
                        x->errorperbit);
Jingning Han's avatar
Jingning Han committed
1054 1055 1056 1057
    }
  }
}

hui su's avatar
hui su committed
1058
static INLINE void calc_int_sad_list(const MACROBLOCK