vp9_mcomp.c 68.2 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5 6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9 10
 */

Dmitry Kovalev's avatar
Dmitry Kovalev committed
11 12
#include <limits.h>
#include <math.h>
Dmitry Kovalev's avatar
Dmitry Kovalev committed
13
#include <stdio.h>
John Koleszar's avatar
John Koleszar committed
14

15
#include "./vpx_config.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
16 17 18

#include "vpx_mem/vpx_mem.h"

Ronald S. Bultje's avatar
Ronald S. Bultje committed
19
#include "vp9/common/vp9_common.h"
John Koleszar's avatar
John Koleszar committed
20

Dmitry Kovalev's avatar
Dmitry Kovalev committed
21 22 23
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_mcomp.h"

24 25
// #define NEW_DIAMOND_SEARCH

Dmitry Kovalev's avatar
Dmitry Kovalev committed
26
void vp9_set_mv_search_range(MACROBLOCK *x, const MV *mv) {
27 28 29 30 31 32 33 34 35
  int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0);
  int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0);
  int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL;
  int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL;

  col_min = MAX(col_min, (MV_LOW >> 3) + 1);
  row_min = MAX(row_min, (MV_LOW >> 3) + 1);
  col_max = MIN(col_max, (MV_UPP >> 3) - 1);
  row_max = MIN(row_max, (MV_UPP >> 3) - 1);
36 37 38

  // Get intersection of UMV window and valid MV window to reduce # of checks
  // in diamond search.
39 40 41 42 43 44 45 46 47 48
  if (x->mv_col_min < col_min)
    x->mv_col_min = col_min;
  if (x->mv_col_max > col_max)
    x->mv_col_max = col_max;
  if (x->mv_row_min < row_min)
    x->mv_row_min = row_min;
  if (x->mv_row_max > row_max)
    x->mv_row_max = row_max;
}

49
int vp9_init_search_range(VP9_COMP *cpi, int size) {
50 51
  int sr = 0;

Paul Wilkins's avatar
Paul Wilkins committed
52 53 54
  // Minimum search size no matter what the passed in value.
  size = MAX(16, size);

55
  while ((size << sr) < MAX_FULL_PEL_VAL)
56 57
    sr++;

58 59
  sr += cpi->sf.reduce_first_step_size;
  sr = MIN(sr, (cpi->sf.max_step_search_steps - 2));
60 61 62
  return sr;
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
63 64 65 66
static INLINE int mv_cost(const MV *mv,
                          const int *joint_cost, int *comp_cost[2]) {
  return joint_cost[vp9_get_mv_joint(mv)] +
             comp_cost[0][mv->row] + comp_cost[1][mv->col];
John Koleszar's avatar
John Koleszar committed
67 68
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
69 70 71 72 73 74 75 76 77
int vp9_mv_bit_cost(const MV *mv, const MV *ref,
                    const int *mvjcost, int *mvcost[2], int weight) {
  const MV diff = { mv->row - ref->row,
                    mv->col - ref->col };
  return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7);
}

static int mv_err_cost(const MV *mv, const MV *ref,
                       const int *mvjcost, int *mvcost[2],
78
                       int error_per_bit) {
79
  if (mvcost) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
80 81 82 83
    const MV diff = { mv->row - ref->row,
                      mv->col - ref->col };
    return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) *
                                  error_per_bit, 13);
84
  }
John Koleszar's avatar
John Koleszar committed
85
  return 0;
86 87
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
88 89 90
static int mvsad_err_cost(const MV *mv, const MV *ref,
                          const int *mvjsadcost, int *mvsadcost[2],
                          int error_per_bit) {
91
  if (mvsadcost) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
92 93 94 95
    const MV diff = { mv->row - ref->row,
                      mv->col - ref->col };
    return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjsadcost, mvsadcost) *
                                  error_per_bit, 8);
96
  }
John Koleszar's avatar
John Koleszar committed
97
  return 0;
John Koleszar's avatar
John Koleszar committed
98 99
}

100
void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
101
  int len;
John Koleszar's avatar
John Koleszar committed
102
  int search_site_count = 0;
John Koleszar's avatar
John Koleszar committed
103

John Koleszar's avatar
John Koleszar committed
104 105 106 107 108 109
  // Generate offsets for 4 search sites per step.
  x->ss[search_site_count].mv.col = 0;
  x->ss[search_site_count].mv.row = 0;
  x->ss[search_site_count].offset = 0;
  search_site_count++;

Dmitry Kovalev's avatar
Dmitry Kovalev committed
110
  for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
John Koleszar's avatar
John Koleszar committed
111 112
    // Compute offsets for search sites.
    x->ss[search_site_count].mv.col = 0;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
113 114
    x->ss[search_site_count].mv.row = -len;
    x->ss[search_site_count].offset = -len * stride;
John Koleszar's avatar
John Koleszar committed
115 116 117
    search_site_count++;

    // Compute offsets for search sites.
John Koleszar's avatar
John Koleszar committed
118
    x->ss[search_site_count].mv.col = 0;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
119 120
    x->ss[search_site_count].mv.row = len;
    x->ss[search_site_count].offset = len * stride;
John Koleszar's avatar
John Koleszar committed
121 122 123
    search_site_count++;

    // Compute offsets for search sites.
Dmitry Kovalev's avatar
Dmitry Kovalev committed
124
    x->ss[search_site_count].mv.col = -len;
John Koleszar's avatar
John Koleszar committed
125
    x->ss[search_site_count].mv.row = 0;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
126
    x->ss[search_site_count].offset = -len;
John Koleszar's avatar
John Koleszar committed
127 128
    search_site_count++;

John Koleszar's avatar
John Koleszar committed
129
    // Compute offsets for search sites.
Dmitry Kovalev's avatar
Dmitry Kovalev committed
130
    x->ss[search_site_count].mv.col = len;
John Koleszar's avatar
John Koleszar committed
131
    x->ss[search_site_count].mv.row = 0;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
132
    x->ss[search_site_count].offset = len;
John Koleszar's avatar
John Koleszar committed
133 134 135 136 137
    search_site_count++;
  }

  x->ss_count = search_site_count;
  x->searches_per_step = 4;
John Koleszar's avatar
John Koleszar committed
138 139
}

140
void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) {
141
  int len, ss_count = 1;
John Koleszar's avatar
John Koleszar committed
142

143 144
  x->ss[0].mv.col = x->ss[0].mv.row = 0;
  x->ss[0].offset = 0;
John Koleszar's avatar
John Koleszar committed
145

Dmitry Kovalev's avatar
Dmitry Kovalev committed
146
  for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
147 148
    // Generate offsets for 8 search sites per step.
    const MV ss_mvs[8] = {
149 150
      {-len,  0  }, {len,  0  }, { 0,   -len}, {0,    len},
      {-len, -len}, {-len, len}, {len,  -len}, {len,  len}
151 152 153 154 155 156 157
    };
    int i;
    for (i = 0; i < 8; ++i) {
      search_site *const ss = &x->ss[ss_count++];
      ss->mv = ss_mvs[i];
      ss->offset = ss->mv.row * stride + ss->mv.col;
    }
John Koleszar's avatar
John Koleszar committed
158 159
  }

160
  x->ss_count = ss_count;
John Koleszar's avatar
John Koleszar committed
161
  x->searches_per_step = 8;
John Koleszar's avatar
John Koleszar committed
162 163
}

164 165 166 167 168 169 170 171 172
/*
 * To avoid the penalty for crossing cache-line read, preload the reference
 * area in a small buffer, which is aligned to make sure there won't be crossing
 * cache-line read while reading from this buffer. This reduced the cpu
 * cycles spent on reading ref data in sub-pixel filter functions.
 * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
 * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
 * could reduce the area.
 */
173

174
/* estimated cost of a motion vector (r,c) */
175 176 177 178
#define MVC(r, c)                                       \
    (mvcost ?                                           \
     ((mvjcost[((r) != rr) * 2 + ((c) != rc)] +         \
       mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \
179 180
      error_per_bit + 4096) >> 13 : 0)

181

182 183 184 185
// convert motion vector component to offset for svf calc
static INLINE int sp(int x) {
  return (x & 7) << 1;
}
186

187 188 189 190 191
#define IFMVCV(r, c, s, e)                                \
    if (c >= minc && c <= maxc && r >= minr && r <= maxr) \
      s                                                   \
    else                                                  \
      e;
192

193 194 195
static INLINE uint8_t *pre(uint8_t *buf, int stride, int r, int c, int offset) {
  return &buf[(r >> 3) * stride + (c >> 3) - offset];
}
196

197
/* returns subpixel variance error function */
198
#define DIST(r, c) \
199 200
    vfp->svf(pre(y, y_stride, r, c, offset), y_stride, sp(c), sp(r), z, \
             src_stride, &sse)
201 202 203 204 205 206 207 208 209 210 211 212 213 214

/* checks if (r, c) has better score than previous best */
#define CHECK_BETTER(v, r, c) \
    IFMVCV(r, c, {                                                       \
      thismse = (DIST(r, c));                                            \
      if ((v = MVC(r, c) + thismse) < besterr) {                         \
        besterr = v;                                                     \
        br = r;                                                          \
        bc = c;                                                          \
        *distortion = thismse;                                           \
        *sse1 = sse;                                                     \
      }                                                                  \
    },                                                                   \
    v = INT_MAX;)
215

216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281
#define FIRST_LEVEL_CHECKS                              \
  {                                                     \
    unsigned int left, right, up, down, diag;           \
    CHECK_BETTER(left, tr, tc - hstep);                 \
    CHECK_BETTER(right, tr, tc + hstep);                \
    CHECK_BETTER(up, tr - hstep, tc);                   \
    CHECK_BETTER(down, tr + hstep, tc);                 \
    whichdir = (left < right ? 0 : 1) +                 \
               (up < down ? 0 : 2);                     \
    switch (whichdir) {                                 \
      case 0:                                           \
        CHECK_BETTER(diag, tr - hstep, tc - hstep);     \
        break;                                          \
      case 1:                                           \
        CHECK_BETTER(diag, tr - hstep, tc + hstep);     \
        break;                                          \
      case 2:                                           \
        CHECK_BETTER(diag, tr + hstep, tc - hstep);     \
        break;                                          \
      case 3:                                           \
        CHECK_BETTER(diag, tr + hstep, tc + hstep);     \
        break;                                          \
    }                                                   \
  }

#define SECOND_LEVEL_CHECKS                             \
  {                                                     \
    int kr, kc;                                         \
    unsigned int second;                                \
    if (tr != br && tc != bc) {                         \
      kr = br - tr;                                     \
      kc = bc - tc;                                     \
      CHECK_BETTER(second, tr + kr, tc + 2 * kc);       \
      CHECK_BETTER(second, tr + 2 * kr, tc + kc);       \
    } else if (tr == br && tc != bc) {                  \
      kc = bc - tc;                                     \
      CHECK_BETTER(second, tr + hstep, tc + 2 * kc);    \
      CHECK_BETTER(second, tr - hstep, tc + 2 * kc);    \
      switch (whichdir) {                               \
        case 0:                                         \
        case 1:                                         \
          CHECK_BETTER(second, tr + hstep, tc + kc);    \
          break;                                        \
        case 2:                                         \
        case 3:                                         \
          CHECK_BETTER(second, tr - hstep, tc + kc);    \
          break;                                        \
      }                                                 \
    } else if (tr != br && tc == bc) {                  \
      kr = br - tr;                                     \
      CHECK_BETTER(second, tr + 2 * kr, tc + hstep);    \
      CHECK_BETTER(second, tr + 2 * kr, tc - hstep);    \
      switch (whichdir) {                               \
        case 0:                                         \
        case 2:                                         \
          CHECK_BETTER(second, tr + kr, tc + hstep);    \
          break;                                        \
        case 1:                                         \
        case 3:                                         \
          CHECK_BETTER(second, tr + kr, tc - hstep);    \
          break;                                        \
      }                                                 \
    }                                                   \
  }

int vp9_find_best_sub_pixel_tree(MACROBLOCK *x,
282
                                 MV *bestmv, const MV *ref_mv,
283
                                 int allow_hp,
284 285 286 287 288 289 290 291
                                 int error_per_bit,
                                 const vp9_variance_fn_ptr_t *vfp,
                                 int forced_stop,
                                 int iters_per_step,
                                 int *mvjcost, int *mvcost[2],
                                 int *distortion,
                                 unsigned int *sse1) {
  uint8_t *z = x->plane[0].src.buf;
292
  const int src_stride = x->plane[0].src.stride;
293 294 295 296 297 298 299 300 301
  MACROBLOCKD *xd = &x->e_mbd;
  unsigned int besterr = INT_MAX;
  unsigned int sse;
  unsigned int whichdir;
  int thismse;
  unsigned int halfiters = iters_per_step;
  unsigned int quarteriters = iters_per_step;
  unsigned int eighthiters = iters_per_step;

302
  const int y_stride = xd->plane[0].pre[0].stride;
303
  const int offset = bestmv->row * y_stride + bestmv->col;
304
  uint8_t *y = xd->plane[0].pre[0].buf + offset;
305

306 307 308 309
  int rr = ref_mv->row;
  int rc = ref_mv->col;
  int br = bestmv->row * 8;
  int bc = bestmv->col * 8;
310
  int hstep = 4;
311 312 313 314
  const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
  const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
  const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
  const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
315

316 317
  int tr = br;
  int tc = bc;
318 319

  // central mv
320 321
  bestmv->row *= 8;
  bestmv->col *= 8;
322 323 324 325

  // calculate central point error
  besterr = vfp->vf(y, y_stride, z, src_stride, sse1);
  *distortion = besterr;
326
  besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346

  // 1/2 pel
  FIRST_LEVEL_CHECKS;
  if (halfiters > 1) {
    SECOND_LEVEL_CHECKS;
  }
  tr = br;
  tc = bc;

  // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
  if (forced_stop != 2) {
    hstep >>= 1;
    FIRST_LEVEL_CHECKS;
    if (quarteriters > 1) {
      SECOND_LEVEL_CHECKS;
    }
    tr = br;
    tc = bc;
  }

347
  if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) {
348 349 350 351 352 353 354 355 356
    hstep >>= 1;
    FIRST_LEVEL_CHECKS;
    if (eighthiters > 1) {
      SECOND_LEVEL_CHECKS;
    }
    tr = br;
    tc = bc;
  }

357 358
  bestmv->row = br;
  bestmv->col = bc;
359

360 361
  if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
      (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
362 363 364 365 366
    return INT_MAX;

  return besterr;
}

367 368 369
#undef DIST
/* returns subpixel variance error function */
#define DIST(r, c) \
370
    vfp->svaf(pre(y, y_stride, r, c, offset), y_stride, sp(c), sp(r), \
371 372
              z, src_stride, &sse, second_pred)

373
int vp9_find_best_sub_pixel_comp_tree(MACROBLOCK *x,
374
                                      MV *bestmv, const MV *ref_mv,
375
                                      int allow_hp,
376 377 378 379 380 381 382 383 384 385
                                      int error_per_bit,
                                      const vp9_variance_fn_ptr_t *vfp,
                                      int forced_stop,
                                      int iters_per_step,
                                      int *mvjcost, int *mvcost[2],
                                      int *distortion,
                                      unsigned int *sse1,
                                      const uint8_t *second_pred,
                                      int w, int h) {
  uint8_t *z = x->plane[0].src.buf;
386
  const int src_stride = x->plane[0].src.stride;
387 388 389 390 391 392 393 394 395 396
  MACROBLOCKD *xd = &x->e_mbd;
  unsigned int besterr = INT_MAX;
  unsigned int sse;
  unsigned int whichdir;
  int thismse;
  unsigned int halfiters = iters_per_step;
  unsigned int quarteriters = iters_per_step;
  unsigned int eighthiters = iters_per_step;

  DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
397
  const int y_stride = xd->plane[0].pre[0].stride;
398
  const int offset = bestmv->row * y_stride + bestmv->col;
399
  uint8_t *y = xd->plane[0].pre[0].buf + offset;
400

401 402 403 404
  int rr = ref_mv->row;
  int rc = ref_mv->col;
  int br = bestmv->row * 8;
  int bc = bestmv->col * 8;
405
  int hstep = 4;
406 407 408 409
  const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
  const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
  const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
  const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
410

411 412
  int tr = br;
  int tc = bc;
413 414

  // central mv
415 416
  bestmv->row *= 8;
  bestmv->col *= 8;
417 418 419 420 421 422 423

  // calculate central point error
  // TODO(yunqingwang): central pointer error was already calculated in full-
  // pixel search, and can be passed in this function.
  comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride);
  besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
  *distortion = besterr;
424
  besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449

  // Each subsequent iteration checks at least one point in
  // common with the last iteration could be 2 ( if diag selected)
  // 1/2 pel
  FIRST_LEVEL_CHECKS;
  if (halfiters > 1) {
    SECOND_LEVEL_CHECKS;
  }
  tr = br;
  tc = bc;

  // Each subsequent iteration checks at least one point in common with
  // the last iteration could be 2 ( if diag selected) 1/4 pel

  // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
  if (forced_stop != 2) {
    hstep >>= 1;
    FIRST_LEVEL_CHECKS;
    if (quarteriters > 1) {
      SECOND_LEVEL_CHECKS;
    }
    tr = br;
    tc = bc;
  }

450
  if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) {
451 452 453 454 455 456 457 458
    hstep >>= 1;
    FIRST_LEVEL_CHECKS;
    if (eighthiters > 1) {
      SECOND_LEVEL_CHECKS;
    }
    tr = br;
    tc = bc;
  }
459 460
  bestmv->row = br;
  bestmv->col = bc;
461

462 463
  if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
      (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
464 465 466 467
    return INT_MAX;

  return besterr;
}
468

John Koleszar's avatar
John Koleszar committed
469 470 471
#undef MVC
#undef PRE
#undef DIST
472
#undef IFMVCV
John Koleszar's avatar
John Koleszar committed
473
#undef CHECK_BETTER
474 475
#undef SP

476 477
static INLINE int check_bounds(const MACROBLOCK *x, int row, int col,
                               int range) {
478 479 480 481 482
  return ((row - range) >= x->mv_row_min) &
         ((row + range) <= x->mv_row_max) &
         ((col - range) >= x->mv_col_min) &
         ((col + range) <= x->mv_col_max);
}
Yunqing Wang's avatar
Yunqing Wang committed
483

484 485 486 487 488 489
static INLINE int check_point(const MACROBLOCK *x, const MV *mv) {
  return (mv->col < x->mv_col_min) |
         (mv->col > x->mv_col_max) |
         (mv->row < x->mv_row_min) |
         (mv->row > x->mv_row_max);
}
Yunqing Wang's avatar
Yunqing Wang committed
490 491

#define CHECK_BETTER \
John Koleszar's avatar
John Koleszar committed
492
  {\
Yunqing Wang's avatar
Yunqing Wang committed
493 494
    if (thissad < bestsad)\
    {\
495
      if (use_mvcost) \
496
        thissad += mvsad_err_cost(&this_mv, &fcenter_mv, \
497 498
                                  mvjsadcost, mvsadcost, \
                                  sad_per_bit);\
John Koleszar's avatar
John Koleszar committed
499 500 501 502 503
      if (thissad < bestsad)\
      {\
        bestsad = thissad;\
        best_site = i;\
      }\
Yunqing Wang's avatar
Yunqing Wang committed
504
    }\
John Koleszar's avatar
John Koleszar committed
505 506
  }

507 508 509 510 511 512 513 514 515 516 517 518 519 520
#define get_next_chkpts(list, i, n)   \
    list[0] = ((i) == 0 ? (n) - 1 : (i) - 1);  \
    list[1] = (i);                             \
    list[2] = ((i) == (n) - 1 ? 0 : (i) + 1);

#define MAX_PATTERN_SCALES         11
#define MAX_PATTERN_CANDIDATES      8  // max number of canddiates per scale
#define PATTERN_CANDIDATES_REF      3  // number of refinement candidates

// Generic pattern search function that searches over multiple scales.
// Each scale can have a different number of candidates and shape of
// candidates as indicated in the num_candidates and candidates arrays
// passed into this function
static int vp9_pattern_search(MACROBLOCK *x,
521
                              MV *ref_mv,
522 523 524 525 526 527
                              int search_param,
                              int sad_per_bit,
                              int do_init_search,
                              int do_refine,
                              const vp9_variance_fn_ptr_t *vfp,
                              int use_mvcost,
528
                              const MV *center_mv, MV *best_mv,
529 530 531
                              const int num_candidates[MAX_PATTERN_SCALES],
                              const MV candidates[MAX_PATTERN_SCALES]
                                                 [MAX_PATTERN_CANDIDATES]) {
532
  const MACROBLOCKD* const xd = &x->e_mbd;
533 534 535 536
  static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = {
    10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
  };
  int i, j, s, t;
John Koleszar's avatar
John Koleszar committed
537 538
  uint8_t *what = x->plane[0].src.buf;
  int what_stride = x->plane[0].src.stride;
539
  int in_what_stride = xd->plane[0].pre[0].stride;
John Koleszar's avatar
John Koleszar committed
540
  int br, bc;
541
  MV this_mv;
542 543
  int bestsad = INT_MAX;
  int thissad;
544 545
  uint8_t *base_offset;
  uint8_t *this_offset;
John Koleszar's avatar
John Koleszar committed
546 547
  int k = -1;
  int best_site = -1;
548
  MV fcenter_mv;
549 550 551 552
  int best_init_s = search_param_to_steps[search_param];
  int *mvjsadcost = x->nmvjointsadcost;
  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};

553 554
  fcenter_mv.row = center_mv->row >> 3;
  fcenter_mv.col = center_mv->col >> 3;
John Koleszar's avatar
John Koleszar committed
555 556

  // adjust ref_mv to make sure it is within MV range
557 558 559
  clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
  br = ref_mv->row;
  bc = ref_mv->col;
John Koleszar's avatar
John Koleszar committed
560 561

  // Work out the start point for the search
562
  base_offset = (uint8_t *)(xd->plane[0].pre[0].buf);
563
  this_offset = base_offset + (br * in_what_stride) + bc;
564 565
  this_mv.row = br;
  this_mv.col = bc;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
566
  bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 0x7fffffff)
567
                + mvsad_err_cost(&this_mv, &fcenter_mv,
Dmitry Kovalev's avatar
Dmitry Kovalev committed
568
                                 mvjsadcost, mvsadcost, sad_per_bit);
John Koleszar's avatar
John Koleszar committed
569

570 571 572 573 574 575 576 577
  // Search all possible scales upto the search param around the center point
  // pick the scale of the point that is best as the starting scale of
  // further steps around it.
  if (do_init_search) {
    s = best_init_s;
    best_init_s = -1;
    for (t = 0; t <= s; ++t) {
      best_site = -1;
578
      if (check_bounds(x, br, bc, 1 << t)) {
579
        for (i = 0; i < num_candidates[t]; i++) {
580 581 582 583
          this_mv.row = br + candidates[t][i].row;
          this_mv.col = bc + candidates[t][i].col;
          this_offset = base_offset + (this_mv.row * in_what_stride) +
                                       this_mv.col;
584 585 586 587 588 589
          thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
                             bestsad);
          CHECK_BETTER
        }
      } else {
        for (i = 0; i < num_candidates[t]; i++) {
590 591
          this_mv.row = br + candidates[t][i].row;
          this_mv.col = bc + candidates[t][i].col;
592 593
          if (check_point(x, &this_mv))
            continue;
594 595
          this_offset = base_offset + (this_mv.row * in_what_stride) +
                                       this_mv.col;
596 597 598 599 600 601 602 603 604 605 606
          thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
                             bestsad);
          CHECK_BETTER
        }
      }
      if (best_site == -1) {
        continue;
      } else {
        best_init_s = t;
        k = best_site;
      }
John Koleszar's avatar
John Koleszar committed
607
    }
608 609 610
    if (best_init_s != -1) {
      br += candidates[best_init_s][k].row;
      bc += candidates[best_init_s][k].col;
John Koleszar's avatar
John Koleszar committed
611 612 613
    }
  }

614 615 616 617
  // If the center point is still the best, just skip this and move to
  // the refinement step.
  if (best_init_s != -1) {
    s = best_init_s;
John Koleszar's avatar
John Koleszar committed
618
    best_site = -1;
619 620 621
    do {
      // No need to search all 6 points the 1st time if initial search was used
      if (!do_init_search || s != best_init_s) {
622
        if (check_bounds(x, br, bc, 1 << s)) {
623
          for (i = 0; i < num_candidates[s]; i++) {
624 625 626 627
            this_mv.row = br + candidates[s][i].row;
            this_mv.col = bc + candidates[s][i].col;
            this_offset = base_offset + (this_mv.row * in_what_stride) +
                                         this_mv.col;
628 629 630 631 632 633
            thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
                               bestsad);
            CHECK_BETTER
          }
        } else {
          for (i = 0; i < num_candidates[s]; i++) {
634 635
            this_mv.row = br + candidates[s][i].row;
            this_mv.col = bc + candidates[s][i].col;
636 637
            if (check_point(x, &this_mv))
              continue;
638 639
            this_offset = base_offset + (this_mv.row * in_what_stride) +
                                         this_mv.col;
640 641 642 643 644 645 646 647 648 649 650 651 652
            thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
                               bestsad);
            CHECK_BETTER
          }
        }

        if (best_site == -1) {
          continue;
        } else {
          br += candidates[s][best_site].row;
          bc += candidates[s][best_site].col;
          k = best_site;
        }
John Koleszar's avatar
John Koleszar committed
653
      }
654

655 656 657 658
      do {
        int next_chkpts_indices[PATTERN_CANDIDATES_REF];
        best_site = -1;
        get_next_chkpts(next_chkpts_indices, k, num_candidates[s]);
659
        if (check_bounds(x, br, bc, 1 << s)) {
660
          for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
661 662 663 664
            this_mv.row = br + candidates[s][next_chkpts_indices[i]].row;
            this_mv.col = bc + candidates[s][next_chkpts_indices[i]].col;
            this_offset = base_offset + (this_mv.row * (in_what_stride)) +
                                         this_mv.col;
665 666 667 668 669 670
            thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
                               bestsad);
            CHECK_BETTER
          }
        } else {
          for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
671 672
            this_mv.row = br + candidates[s][next_chkpts_indices[i]].row;
            this_mv.col = bc + candidates[s][next_chkpts_indices[i]].col;
673 674
            if (check_point(x, &this_mv))
              continue;
675 676
            this_offset = base_offset + (this_mv.row * (in_what_stride)) +
                                         this_mv.col;
677 678 679 680 681 682 683 684 685 686 687 688 689
            thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
                               bestsad);
            CHECK_BETTER
          }
        }

        if (best_site != -1) {
          k = next_chkpts_indices[best_site];
          br += candidates[s][k].row;
          bc += candidates[s][k].col;
        }
      } while (best_site != -1);
    } while (s--);
John Koleszar's avatar
John Koleszar committed
690
  }
691

692 693 694 695 696 697 698 699
  // Check 4 1-away neighbors if do_refine is true.
  // For most well-designed schemes do_refine will not be necessary.
  if (do_refine) {
    static const MV neighbors[4] = {
      {0, -1}, { -1, 0}, {1, 0}, {0, 1},
    };
    for (j = 0; j < 16; j++) {
      best_site = -1;
700
      if (check_bounds(x, br, bc, 1)) {
701
        for (i = 0; i < 4; i++) {
702 703 704 705
          this_mv.row = br + neighbors[i].row;
          this_mv.col = bc + neighbors[i].col;
          this_offset = base_offset + (this_mv.row * (in_what_stride)) +
                                       this_mv.col;
706 707 708 709 710 711
          thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
                             bestsad);
          CHECK_BETTER
        }
      } else {
        for (i = 0; i < 4; i++) {
712 713
          this_mv.row = br + neighbors[i].row;
          this_mv.col = bc + neighbors[i].col;
714 715
          if (check_point(x, &this_mv))
            continue;
716 717
          this_offset = base_offset + (this_mv.row * (in_what_stride)) +
                                       this_mv.col;
718 719 720 721 722
          thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
                             bestsad);
          CHECK_BETTER
        }
          }
Yunqing Wang's avatar
Yunqing Wang committed
723

724 725 726 727 728 729
      if (best_site == -1) {
        break;
      } else {
        br += neighbors[best_site].row;
        bc += neighbors[best_site].col;
      }
John Koleszar's avatar
John Koleszar committed
730
    }
John Koleszar's avatar
John Koleszar committed
731
  }
John Koleszar's avatar
John Koleszar committed
732

733 734
  best_mv->row = br;
  best_mv->col = bc;
John Koleszar's avatar
John Koleszar committed
735

736 737 738 739
  this_offset = base_offset + (best_mv->row * in_what_stride) +
                               best_mv->col;
  this_mv.row = best_mv->row * 8;
  this_mv.col = best_mv->col * 8;
740 741
  if (bestsad == INT_MAX)
    return INT_MAX;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
742 743 744

  return vfp->vf(what, what_stride, this_offset, in_what_stride,
                 (unsigned int *)&bestsad) +
745
         use_mvcost ? mv_err_cost(&this_mv, center_mv,
Dmitry Kovalev's avatar
Dmitry Kovalev committed
746 747
                                  x->nmvjointcost, x->mvcost, x->errorperbit)
                    : 0;
748 749 750 751
}


int vp9_hex_search(MACROBLOCK *x,
752
                   MV *ref_mv,
753 754 755 756 757
                   int search_param,
                   int sad_per_bit,
                   int do_init_search,
                   const vp9_variance_fn_ptr_t *vfp,
                   int use_mvcost,
758
                   const MV *center_mv, MV *best_mv) {
759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783
  // First scale has 8-closest points, the rest have 6 points in hex shape
  // at increasing scales
  static const int hex_num_candidates[MAX_PATTERN_SCALES] = {
    8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6
  };
  // Note that the largest candidate step at each scale is 2^scale
  static const MV hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
    {{-1, -1}, {0, -1}, {1, -1}, {1, 0}, {1, 1}, { 0, 1}, { -1, 1}, {-1, 0}},
    {{-1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0}},
    {{-2, -4}, {2, -4}, {4, 0}, {2, 4}, { -2, 4}, { -4, 0}},
    {{-4, -8}, {4, -8}, {8, 0}, {4, 8}, { -4, 8}, { -8, 0}},
    {{-8, -16}, {8, -16}, {16, 0}, {8, 16}, { -8, 16}, { -16, 0}},
    {{-16, -32}, {16, -32}, {32, 0}, {16, 32}, { -16, 32}, { -32, 0}},
    {{-32, -64}, {32, -64}, {64, 0}, {32, 64}, { -32, 64}, { -64, 0}},
    {{-64, -128}, {64, -128}, {128, 0}, {64, 128}, { -64, 128}, { -128, 0}},
    {{-128, -256}, {128, -256}, {256, 0}, {128, 256}, { -128, 256}, { -256, 0}},
    {{-256, -512}, {256, -512}, {512, 0}, {256, 512}, { -256, 512}, { -512, 0}},
    {{-512, -1024}, {512, -1024}, {1024, 0}, {512, 1024}, { -512, 1024},
      { -1024, 0}},
  };
  return
      vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
                         do_init_search, 0, vfp, use_mvcost,
                         center_mv, best_mv,
                         hex_num_candidates, hex_candidates);
John Koleszar's avatar
John Koleszar committed
784
}
785 786

int vp9_bigdia_search(MACROBLOCK *x,
787
                      MV *ref_mv,
788 789 790 791 792
                      int search_param,
                      int sad_per_bit,
                      int do_init_search,
                      const vp9_variance_fn_ptr_t *vfp,
                      int use_mvcost,
793 794
                      const MV *center_mv,
                      MV *best_mv) {
795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820
  // First scale has 4-closest points, the rest have 8 points in diamond
  // shape at increasing scales
  static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = {
    4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  };
  // Note that the largest candidate step at each scale is 2^scale
  static const MV bigdia_candidates[MAX_PATTERN_SCALES]
                                   [MAX_PATTERN_CANDIDATES] = {
    {{0, -1}, {1, 0}, { 0, 1}, {-1, 0}},
    {{-1, -1}, {0, -2}, {1, -1}, {2, 0}, {1, 1}, {0, 2}, {-1, 1}, {-2, 0}},
    {{-2, -2}, {0, -4}, {2, -2}, {4, 0}, {2, 2}, {0, 4}, {-2, 2}, {-4, 0}},
    {{-4, -4}, {0, -8}, {4, -4}, {8, 0}, {4, 4}, {0, 8}, {-4, 4}, {-8, 0}},
    {{-8, -8}, {0, -16}, {8, -8}, {16, 0}, {8, 8}, {0, 16}, {-8, 8}, {-16, 0}},
    {{-16, -16}, {0, -32}, {16, -16}, {32, 0}, {16, 16}, {0, 32},
      {-16, 16}, {-32, 0}},
    {{-32, -32}, {0, -64}, {32, -32}, {64, 0}, {32, 32}, {0, 64},
      {-32, 32}, {-64, 0}},
    {{-64, -64}, {0, -128}, {64, -64}, {128, 0}, {64, 64}, {0, 128},
      {-64, 64}, {-128, 0}},
    {{-128, -128}, {0, -256}, {128, -128}, {256, 0}, {128, 128}, {0, 256},
      {-128, 128}, {-256, 0}},
    {{-256, -256}, {0, -512}, {256, -256}, {512, 0}, {256, 256}, {0, 512},
      {-256, 256}, {-512, 0}},
    {{-512, -512}, {0, -1024}, {512, -512}, {1024, 0}, {512, 512}, {0, 1024},
      {-512, 512}, {-1024, 0}},
  };
821 822 823 824
  return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
                            do_init_search, 0, vfp, use_mvcost,
                            center_mv, best_mv,
                            bigdia_num_candidates, bigdia_candidates);
825 826 827
}

int vp9_square_search(MACROBLOCK *x,
828
                      MV *ref_mv,
829 830 831 832 833
                      int search_param,
                      int sad_per_bit,
                      int do_init_search,
                      const vp9_variance_fn_ptr_t *vfp,
                      int use_mvcost,
834 835
                      const MV *center_mv,
                      MV *best_mv) {
836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861
  // All scales have 8 closest points in square shape
  static const int square_num_candidates[MAX_PATTERN_SCALES] = {
    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  };
  // Note that the largest candidate step at each scale is 2^scale
  static const MV square_candidates[MAX_PATTERN_SCALES]
                                   [MAX_PATTERN_CANDIDATES] = {
    {{-1, -1}, {0, -1}, {1, -1}, {1, 0}, {1, 1}, {0, 1}, {-1, 1}, {-1, 0}},
    {{-2, -2}, {0, -2}, {2, -2}, {2, 0}, {2, 2}, {0, 2}, {-2, 2}, {-2, 0}},
    {{-4, -4}, {0, -4}, {4, -4}, {4, 0}, {4, 4}, {0, 4}, {-4, 4}, {-4, 0}},
    {{-8, -8}, {0, -8}, {8, -8}, {8, 0}, {8, 8}, {0, 8}, {-8, 8}, {-8, 0}},
    {{-16, -16}, {0, -16}, {16, -16}, {16, 0}, {16, 16}, {0, 16},
      {-16, 16}, {-16, 0}},
    {{-32, -32}, {0, -32}, {32, -32}, {32, 0}, {32, 32}, {0, 32},
      {-32, 32}, {-32, 0}},
    {{-64, -64}, {0, -64}, {64, -64}, {64, 0}, {64, 64}, {0, 64},
      {-64, 64}, {-64, 0}},
    {{-128, -128}, {0, -128}, {128, -128}, {128, 0}, {128, 128}, {0, 128},
      {-128, 128}, {-128, 0}},
    {{-256, -256}, {0, -256}, {256, -256}, {256, 0}, {256, 256}, {0, 256},
      {-256, 256}, {-256, 0}},
    {{-512, -512}, {0, -512}, {512, -512}, {512, 0}, {512, 512}, {0, 512},
      {-512, 512}, {-512, 0}},
    {{-1024, -1024}, {0, -1024}, {1024, -1024}, {1024, 0}, {1024, 1024},
      {0, 1024}, {-1024, 1024}, {-1024, 0}},
  };
862 863 864 865
  return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
                            do_init_search, 0, vfp, use_mvcost,
                            center_mv, best_mv,
                            square_num_candidates, square_candidates);
866 867
};

John Koleszar's avatar
John Koleszar committed
868
#undef CHECK_BETTER
869

870
int vp9_full_range_search_c(MACROBLOCK *x, MV *ref_mv, MV *best_mv,
871 872
                            int search_param, int sad_per_bit, int *num00,
                            vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost,
873
                            int *mvcost[2], const MV *center_mv) {
874 875 876 877 878 879 880
  const MACROBLOCKD* const xd = &x->e_mbd;
  uint8_t *what = x->plane[0].src.buf;
  int what_stride = x->plane[0].src.stride;
  uint8_t *in_what;
  int in_what_stride = xd->plane[0].pre[0].stride;
  uint8_t *best_address;

881
  MV this_mv;
882 883 884 885 886 887

  int bestsad = INT_MAX;
  int ref_row, ref_col;

  uint8_t *check_here;
  int thissad;
888
  MV fcenter_mv;
889 890 891 892 893 894 895 896 897 898 899 900 901

  int *mvjsadcost = x->nmvjointsadcost;
  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};

  int tr, tc;
  int best_tr = 0;
  int best_tc = 0;
  int range = 64;

  int start_col, end_col;
  int start_row, end_row;
  int i;

902 903
  fcenter_mv.row = center_mv->row >> 3;
  fcenter_mv.col = center_mv->col >> 3;
904

905 906 907
  clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
  ref_row = ref_mv->row;
  ref_col = ref_mv->col;
908
  *num00 = 11;
909 910
  best_mv->row = ref_row;
  best_mv->col = ref_col;
911 912 913 914 915 916 917 918

  // Work out the start point for the search
  in_what = (uint8_t *)(xd->plane[0].pre[0].buf +
                        (ref_row * (xd->plane[0].pre[0].stride)) + ref_col);
  best_address = in_what;

  // Check the starting position
  bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff)
919
                + mvsad_err_cost(best_mv, &fcenter_mv,
920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938
                                 mvjsadcost, mvsadcost, sad_per_bit);

  start_row = MAX(-range, x->mv_row_min - ref_row);
  start_col = MAX(-range, x->mv_col_min - ref_col);
  end_row = MIN(range, x->mv_row_max - ref_row);
  end_col = MIN(range, x->mv_col_max - ref_col);

  for (tr = start_row; tr <= end_row; ++tr) {
    for (tc = start_col; tc <= end_col; tc += 4) {
      if ((tc + 3) <= end_col) {
        unsigned int sad_array[4];
        unsigned char const *addr_ref[4];
        for (i = 0; i < 4; ++i)
          addr_ref[i] = in_what + tr * in_what_stride + tc + i;

        fn_ptr->sdx4df(what, what_stride, addr_ref, in_what_stride, sad_array);

        for (i = 0; i < 4; ++i) {
          if (sad_array[i] < bestsad) {
939 940
            this_mv.row = ref_row + tr;
            this_mv.col = ref_col + tc + i;
941
            thissad = sad_array[i] +
942
                      mvsad_err_cost(&this_mv, &fcenter_mv,
943 944 945 946 947 948 949 950 951 952 953 954 955 956 957
                                      mvjsadcost, mvsadcost, sad_per_bit);
            if (thissad < bestsad) {
              bestsad = thissad;
              best_tr = tr;
              best_tc = tc + i;
            }
          }
        }
      } else {
        for (i = 0; i < end_col - tc; ++i) {
          check_here = in_what + tr * in_what_stride + tc + i;
          thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
                                bestsad);

          if (thissad < bestsad) {
958 959 960
            this_mv.row = ref_row + tr;
            this_mv.col = ref_col + tc + i;
            thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
961 962 963 964 965 966 967 968 969 970 971 972 973
                                      mvjsadcost, mvsadcost, sad_per_bit);

            if (thissad < bestsad) {
              bestsad = thissad;
              best_tr = tr;
              best_tc = tc + i;
            }
          }
        }
      }
    }
  }

974 975
  best_mv->row += best_tr;
  best_mv->col += best_tc;
976

977 978
  this_mv.row = best_mv->row * 8;
  this_mv.col = best_mv->col * 8;
979 980 981 982 983 984

  if (bestsad == INT_MAX)
    return INT_MAX;

  return fn_ptr->vf(what, what_stride, best_address, in_what_stride,
                    (unsigned int *)(&thissad)) +
985
                       mv_err_cost(&this_mv, center_mv,
986 987 988
                                   mvjcost, mvcost, x->errorperbit);
}

John Koleszar's avatar
John Koleszar committed
989
int vp9_diamond_search_sad_c(MACROBLOCK *x,
990
                             MV *ref_mv, MV *best_mv,
Jim Bankoski's avatar
Jim Bankoski committed
991
                             int search_param, int sad_per_bit, int *num00,
992
                             vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost,
993
                             int *mvcost[2], const MV *center_mv) {
John Koleszar's avatar
John Koleszar committed
994 995
  int i, j, step;

996
  const MACROBLOCKD* const xd = &x->e_mbd;
John Koleszar's avatar
John Koleszar committed
997 998
  uint8_t *what = x->plane[0].src.buf;
  int what_stride = x->plane[0].src.stride;
999
  uint8_t *in_what;
1000
  int in_what_stride = xd->plane[0].pre[0].stride;
1001
  uint8_t *best_address;
John Koleszar's avatar
John Koleszar committed
1002 1003

  int tot_steps;
1004
  MV this_mv;
John Koleszar's avatar
John Koleszar committed
1005 1006 1007 1008 1009

  int bestsad = INT_MAX;
  int best_site = 0;
  int last_site = 0;

1010 1011
  int ref_row, ref_col;
  int this_row_offset, this_col_offset;
John Koleszar's avatar
John Koleszar committed
1012 1013
  search_site *ss;

1014
  uint8_t *check_here;
John Koleszar's avatar
John Koleszar committed
1015
  int thissad;
1016
  MV fcenter_mv;
1017

1018 1019 1020
  int *mvjsadcost = x->nmvjointsadcost;
  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};

1021 1022
  fcenter_mv.row = center_mv->row >> 3;
  fcenter_mv.col = center_mv->col >> 3;
John Koleszar's avatar
John Koleszar committed
1023

1024 1025 1026
  clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
  ref_row = ref_mv->row;
  ref_col = ref_mv->col;
John Koleszar's avatar
John Koleszar committed
1027
  *num00 = 0;
1028 1029
  best_mv->row = ref_row;
  best_mv->col = ref_col;
John Koleszar's avatar
John Koleszar committed
1030 1031

  // Work out the start point for the search
1032
  in_what = (uint8_t *)(xd->plane[0].pre[0].buf +
Jingning Han's avatar
Jingning Han committed
1033
                        ref_row * in_what_stride + ref_col);
John Koleszar's avatar
John Koleszar committed
1034 1035 1036
  best_address = in_what;

  // Check the starting position
Dmitry Kovalev's avatar
Dmitry Kovalev committed
1037
  bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff)
1038
                + mvsad_err_cost(best_mv, &fcenter_mv,
Dmitry Kovalev's avatar
Dmitry Kovalev committed
1039
                                 mvjsadcost, mvsadcost, sad_per_bit);
John Koleszar's avatar
John Koleszar committed
1040

1041 1042 1043 1044
  // search_param determines the length of the initial step and hence the number
  // of iterations
  // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 =
  // (MAX_FIRST_STEP/4) pel... etc.
John Koleszar's avatar
John Koleszar committed
1045 1046 1047 1048 1049 1050 1051 1052
  ss = &x->ss[search_param * x->searches_per_step];
  tot_steps = (x->ss_count / x->searches_per_step) - search_param;

  i = 1;

  for (step = 0; step < tot_steps; step++) {
    for (j = 0; j < x->searches_per_step; j++) {
      // Trap illegal vectors
1053 1054
      this_row_offset = best_mv->row + ss[i].mv.row;
      this_col_offset = best_mv->col + ss[i].mv.col;
John Koleszar's avatar
John Koleszar committed
1055

1056 1057 1058 1059
      if ((this_col_offset > x->mv_col_min) &&
          (this_col_offset < x->mv_col_max) &&
          (this_row_offset > x->mv_row_min) &&
          (this_row_offset < x->mv_row_max)) {
John Koleszar's avatar
John Koleszar committed
1060
        check_here = ss[i].offset + best_address;
1061 1062
        thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
                              bestsad);
John Koleszar's avatar
John Koleszar committed
1063 1064

        if (thissad < bestsad) {
1065 1066
          this_mv.row = this_row_offset;
          this_mv.col = this_col_offset;
1067
          thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1068
                                    mvjsadcost, mvsadcost, sad_per_bit);
John Koleszar's avatar
John Koleszar committed
1069 1070 1071 1072 1073

          if (thissad < bestsad) {
            bestsad = thissad;
            best_site = i;
          }
John Koleszar's avatar
John Koleszar committed
1074
        }