vp9_mcomp.c 72.2 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
 */

Dmitry Kovalev's avatar
Dmitry Kovalev committed
11
12
#include <limits.h>
#include <math.h>
Dmitry Kovalev's avatar
Dmitry Kovalev committed
13
#include <stdio.h>
John Koleszar's avatar
John Koleszar committed
14

15
#include "./vpx_config.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
16
17
18

#include "vpx_mem/vpx_mem.h"

19
#include "vp9/common/vp9_findnearmv.h"
Ronald S. Bultje's avatar
Ronald S. Bultje committed
20
#include "vp9/common/vp9_common.h"
John Koleszar's avatar
John Koleszar committed
21

Dmitry Kovalev's avatar
Dmitry Kovalev committed
22
23
24
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_mcomp.h"

25
26
// #define NEW_DIAMOND_SEARCH

27
28
29
30
31
32
33
34
void vp9_clamp_mv_min_max(MACROBLOCK *x, MV *mv) {
  const int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0);
  const int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0);
  const int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL;
  const int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL;

  // Get intersection of UMV window and valid MV window to reduce # of checks
  // in diamond search.
35
36
37
38
39
40
41
42
43
44
  if (x->mv_col_min < col_min)
    x->mv_col_min = col_min;
  if (x->mv_col_max > col_max)
    x->mv_col_max = col_max;
  if (x->mv_row_min < row_min)
    x->mv_row_min = row_min;
  if (x->mv_row_max > row_max)
    x->mv_row_max = row_max;
}

45
int vp9_init_search_range(VP9_COMP *cpi, int size) {
46
47
  int sr = 0;

Paul Wilkins's avatar
Paul Wilkins committed
48
49
50
  // Minimum search size no matter what the passed in value.
  size = MAX(16, size);

51
  while ((size << sr) < MAX_FULL_PEL_VAL)
52
53
    sr++;

54
55
  sr += cpi->sf.reduce_first_step_size;
  sr = MIN(sr, (cpi->sf.max_step_search_steps - 2));
56
57
58
  return sr;
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
59
60
61
62
static INLINE int mv_cost(const MV *mv,
                          const int *joint_cost, int *comp_cost[2]) {
  return joint_cost[vp9_get_mv_joint(mv)] +
             comp_cost[0][mv->row] + comp_cost[1][mv->col];
John Koleszar's avatar
John Koleszar committed
63
64
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
65
66
67
68
69
70
71
72
73
int vp9_mv_bit_cost(const MV *mv, const MV *ref,
                    const int *mvjcost, int *mvcost[2], int weight) {
  const MV diff = { mv->row - ref->row,
                    mv->col - ref->col };
  return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7);
}

static int mv_err_cost(const MV *mv, const MV *ref,
                       const int *mvjcost, int *mvcost[2],
74
                       int error_per_bit) {
75
  if (mvcost) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
76
77
78
79
    const MV diff = { mv->row - ref->row,
                      mv->col - ref->col };
    return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) *
                                  error_per_bit, 13);
80
  }
John Koleszar's avatar
John Koleszar committed
81
  return 0;
82
83
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
84
85
86
static int mvsad_err_cost(const MV *mv, const MV *ref,
                          const int *mvjsadcost, int *mvsadcost[2],
                          int error_per_bit) {
87
  if (mvsadcost) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
88
89
90
91
    const MV diff = { mv->row - ref->row,
                      mv->col - ref->col };
    return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjsadcost, mvsadcost) *
                                  error_per_bit, 8);
92
  }
John Koleszar's avatar
John Koleszar committed
93
  return 0;
John Koleszar's avatar
John Koleszar committed
94
95
}

96
void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
97
  int len;
John Koleszar's avatar
John Koleszar committed
98
  int search_site_count = 0;
John Koleszar's avatar
John Koleszar committed
99

John Koleszar's avatar
John Koleszar committed
100
101
102
103
104
105
  // Generate offsets for 4 search sites per step.
  x->ss[search_site_count].mv.col = 0;
  x->ss[search_site_count].mv.row = 0;
  x->ss[search_site_count].offset = 0;
  search_site_count++;

Dmitry Kovalev's avatar
Dmitry Kovalev committed
106
  for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
John Koleszar's avatar
John Koleszar committed
107
108
    // Compute offsets for search sites.
    x->ss[search_site_count].mv.col = 0;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
109
110
    x->ss[search_site_count].mv.row = -len;
    x->ss[search_site_count].offset = -len * stride;
John Koleszar's avatar
John Koleszar committed
111
112
113
    search_site_count++;

    // Compute offsets for search sites.
John Koleszar's avatar
John Koleszar committed
114
    x->ss[search_site_count].mv.col = 0;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
115
116
    x->ss[search_site_count].mv.row = len;
    x->ss[search_site_count].offset = len * stride;
John Koleszar's avatar
John Koleszar committed
117
118
119
    search_site_count++;

    // Compute offsets for search sites.
Dmitry Kovalev's avatar
Dmitry Kovalev committed
120
    x->ss[search_site_count].mv.col = -len;
John Koleszar's avatar
John Koleszar committed
121
    x->ss[search_site_count].mv.row = 0;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
122
    x->ss[search_site_count].offset = -len;
John Koleszar's avatar
John Koleszar committed
123
124
    search_site_count++;

John Koleszar's avatar
John Koleszar committed
125
    // Compute offsets for search sites.
Dmitry Kovalev's avatar
Dmitry Kovalev committed
126
    x->ss[search_site_count].mv.col = len;
John Koleszar's avatar
John Koleszar committed
127
    x->ss[search_site_count].mv.row = 0;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
128
    x->ss[search_site_count].offset = len;
John Koleszar's avatar
John Koleszar committed
129
130
131
132
133
    search_site_count++;
  }

  x->ss_count = search_site_count;
  x->searches_per_step = 4;
John Koleszar's avatar
John Koleszar committed
134
135
}

136
void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) {
137
  int len, ss_count = 1;
John Koleszar's avatar
John Koleszar committed
138

139
140
  x->ss[0].mv.col = x->ss[0].mv.row = 0;
  x->ss[0].offset = 0;
John Koleszar's avatar
John Koleszar committed
141

Dmitry Kovalev's avatar
Dmitry Kovalev committed
142
  for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
143
144
    // Generate offsets for 8 search sites per step.
    const MV ss_mvs[8] = {
145
146
      {-len,  0  }, {len,  0  }, { 0,   -len}, {0,    len},
      {-len, -len}, {-len, len}, {len,  -len}, {len,  len}
147
148
149
150
151
152
153
    };
    int i;
    for (i = 0; i < 8; ++i) {
      search_site *const ss = &x->ss[ss_count++];
      ss->mv = ss_mvs[i];
      ss->offset = ss->mv.row * stride + ss->mv.col;
    }
John Koleszar's avatar
John Koleszar committed
154
155
  }

156
  x->ss_count = ss_count;
John Koleszar's avatar
John Koleszar committed
157
  x->searches_per_step = 8;
John Koleszar's avatar
John Koleszar committed
158
159
}

160
161
162
163
164
165
166
167
168
/*
 * To avoid the penalty for crossing cache-line read, preload the reference
 * area in a small buffer, which is aligned to make sure there won't be crossing
 * cache-line read while reading from this buffer. This reduced the cpu
 * cycles spent on reading ref data in sub-pixel filter functions.
 * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
 * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
 * could reduce the area.
 */
169

170
/* estimated cost of a motion vector (r,c) */
171
172
173
174
#define MVC(r, c)                                       \
    (mvcost ?                                           \
     ((mvjcost[((r) != rr) * 2 + ((c) != rc)] +         \
       mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \
175
176
      error_per_bit + 4096) >> 13 : 0)

177

178
179
#define SP(x) (((x) & 7) << 1)  // convert motion vector component to offset
                                // for svf calc
180

181
182
183
184
185
#define IFMVCV(r, c, s, e)                                \
    if (c >= minc && c <= maxc && r >= minr && r <= maxr) \
      s                                                   \
    else                                                  \
      e;
186
187

/* pointer to predictor base of a motionvector */
188
#define PRE(r, c) (y + (((r) >> 3) * y_stride + ((c) >> 3) -(offset)))
189

190
/* returns subpixel variance error function */
191
#define DIST(r, c) \
John Koleszar's avatar
John Koleszar committed
192
    vfp->svf(PRE(r, c), y_stride, SP(c), SP(r), z, src_stride, &sse)
193
194
195
196
197
198
199
200
201
202
203
204
205
206

/* checks if (r, c) has better score than previous best */
#define CHECK_BETTER(v, r, c) \
    IFMVCV(r, c, {                                                       \
      thismse = (DIST(r, c));                                            \
      if ((v = MVC(r, c) + thismse) < besterr) {                         \
        besterr = v;                                                     \
        br = r;                                                          \
        bc = c;                                                          \
        *distortion = thismse;                                           \
        *sse1 = sse;                                                     \
      }                                                                  \
    },                                                                   \
    v = INT_MAX;)
207

208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
#define FIRST_LEVEL_CHECKS                              \
  {                                                     \
    unsigned int left, right, up, down, diag;           \
    CHECK_BETTER(left, tr, tc - hstep);                 \
    CHECK_BETTER(right, tr, tc + hstep);                \
    CHECK_BETTER(up, tr - hstep, tc);                   \
    CHECK_BETTER(down, tr + hstep, tc);                 \
    whichdir = (left < right ? 0 : 1) +                 \
               (up < down ? 0 : 2);                     \
    switch (whichdir) {                                 \
      case 0:                                           \
        CHECK_BETTER(diag, tr - hstep, tc - hstep);     \
        break;                                          \
      case 1:                                           \
        CHECK_BETTER(diag, tr - hstep, tc + hstep);     \
        break;                                          \
      case 2:                                           \
        CHECK_BETTER(diag, tr + hstep, tc - hstep);     \
        break;                                          \
      case 3:                                           \
        CHECK_BETTER(diag, tr + hstep, tc + hstep);     \
        break;                                          \
    }                                                   \
  }

#define SECOND_LEVEL_CHECKS                             \
  {                                                     \
    int kr, kc;                                         \
    unsigned int second;                                \
    if (tr != br && tc != bc) {                         \
      kr = br - tr;                                     \
      kc = bc - tc;                                     \
      CHECK_BETTER(second, tr + kr, tc + 2 * kc);       \
      CHECK_BETTER(second, tr + 2 * kr, tc + kc);       \
    } else if (tr == br && tc != bc) {                  \
      kc = bc - tc;                                     \
      CHECK_BETTER(second, tr + hstep, tc + 2 * kc);    \
      CHECK_BETTER(second, tr - hstep, tc + 2 * kc);    \
      switch (whichdir) {                               \
        case 0:                                         \
        case 1:                                         \
          CHECK_BETTER(second, tr + hstep, tc + kc);    \
          break;                                        \
        case 2:                                         \
        case 3:                                         \
          CHECK_BETTER(second, tr - hstep, tc + kc);    \
          break;                                        \
      }                                                 \
    } else if (tr != br && tc == bc) {                  \
      kr = br - tr;                                     \
      CHECK_BETTER(second, tr + 2 * kr, tc + hstep);    \
      CHECK_BETTER(second, tr + 2 * kr, tc - hstep);    \
      switch (whichdir) {                               \
        case 0:                                         \
        case 2:                                         \
          CHECK_BETTER(second, tr + kr, tc + hstep);    \
          break;                                        \
        case 1:                                         \
        case 3:                                         \
          CHECK_BETTER(second, tr + kr, tc - hstep);    \
          break;                                        \
      }                                                 \
    }                                                   \
  }

273
int vp9_find_best_sub_pixel_iterative(MACROBLOCK *x,
274
                                      MV *bestmv, const MV *ref_mv,
275
                                      int allow_hp,
276
277
278
279
280
281
282
                                      int error_per_bit,
                                      const vp9_variance_fn_ptr_t *vfp,
                                      int forced_stop,
                                      int iters_per_step,
                                      int *mvjcost, int *mvcost[2],
                                      int *distortion,
                                      unsigned int *sse1) {
John Koleszar's avatar
John Koleszar committed
283
284
  uint8_t *z = x->plane[0].src.buf;
  int src_stride = x->plane[0].src.stride;
John Koleszar's avatar
John Koleszar committed
285
286
287
288
289
  MACROBLOCKD *xd = &x->e_mbd;

  unsigned int besterr = INT_MAX;
  unsigned int sse;
  unsigned int whichdir;
290
291
292
  unsigned int halfiters = iters_per_step;
  unsigned int quarteriters = iters_per_step;
  unsigned int eighthiters = iters_per_step;
John Koleszar's avatar
John Koleszar committed
293
  int thismse;
John Koleszar's avatar
John Koleszar committed
294

Dmitry Kovalev's avatar
Dmitry Kovalev committed
295
  const int y_stride = xd->plane[0].pre[0].stride;
296
  const int offset = bestmv->row * y_stride + bestmv->col;
297
  uint8_t *y = xd->plane[0].pre[0].buf + offset;
298

299
300
301
302
  int rr = ref_mv->row;
  int rc = ref_mv->col;
  int br = bestmv->row * 8;
  int bc = bestmv->col * 8;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
303
  int hstep = 4;
304
305
306
307
  const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
  const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
  const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
  const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
John Koleszar's avatar
John Koleszar committed
308

Dmitry Kovalev's avatar
Dmitry Kovalev committed
309
310
  int tr = br;
  int tc = bc;
John Koleszar's avatar
John Koleszar committed
311
312

  // central mv
313
314
  bestmv->row <<= 3;
  bestmv->col <<= 3;
John Koleszar's avatar
John Koleszar committed
315
316

  // calculate central point error
John Koleszar's avatar
John Koleszar committed
317
  besterr = vfp->vf(y, y_stride, z, src_stride, sse1);
John Koleszar's avatar
John Koleszar committed
318
  *distortion = besterr;
319
  besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
John Koleszar's avatar
John Koleszar committed
320

321
322
  // TODO(jbb): Each subsequent iteration checks at least one point in
  // common with the last iteration could be 2 if diagonal is selected.
323
  while (halfiters--) {
John Koleszar's avatar
John Koleszar committed
324
    // 1/2 pel
325
    FIRST_LEVEL_CHECKS;
John Koleszar's avatar
John Koleszar committed
326
327
328
329
330
331
    // no reason to check the same one again.
    if (tr == br && tc == bc)
      break;
    tr = br;
    tc = bc;
  }
John Koleszar's avatar
John Koleszar committed
332

333
334
  // TODO(yaowu): Each subsequent iteration checks at least one point in common
  // with the last iteration could be 2 if diagonal is selected.
John Koleszar's avatar
John Koleszar committed
335

336
337
338
339
  // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
  if (forced_stop != 2) {
    hstep >>= 1;
    while (quarteriters--) {
340
      FIRST_LEVEL_CHECKS;
341
342
343
344
345
346
      // no reason to check the same one again.
      if (tr == br && tc == bc)
        break;
      tr = br;
      tc = bc;
    }
John Koleszar's avatar
John Koleszar committed
347
  }
John Koleszar's avatar
John Koleszar committed
348

349
  if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) {
350
    hstep >>= 1;
351
    while (eighthiters--) {
352
      FIRST_LEVEL_CHECKS;
John Koleszar's avatar
John Koleszar committed
353
354
355
356
357
      // no reason to check the same one again.
      if (tr == br && tc == bc)
        break;
      tr = br;
      tc = bc;
358
    }
John Koleszar's avatar
John Koleszar committed
359
  }
360

361
362
  bestmv->row = br;
  bestmv->col = bc;
John Koleszar's avatar
John Koleszar committed
363

364
365
  if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
      (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
John Koleszar's avatar
John Koleszar committed
366
    return INT_MAX;
John Koleszar's avatar
John Koleszar committed
367

John Koleszar's avatar
John Koleszar committed
368
  return besterr;
John Koleszar's avatar
John Koleszar committed
369
}
370

371
int vp9_find_best_sub_pixel_tree(MACROBLOCK *x,
372
                                 MV *bestmv, const MV *ref_mv,
373
                                 int allow_hp,
374
375
376
377
378
379
380
381
                                 int error_per_bit,
                                 const vp9_variance_fn_ptr_t *vfp,
                                 int forced_stop,
                                 int iters_per_step,
                                 int *mvjcost, int *mvcost[2],
                                 int *distortion,
                                 unsigned int *sse1) {
  uint8_t *z = x->plane[0].src.buf;
382
  const int src_stride = x->plane[0].src.stride;
383
384
385
386
387
388
389
390
391
  MACROBLOCKD *xd = &x->e_mbd;
  unsigned int besterr = INT_MAX;
  unsigned int sse;
  unsigned int whichdir;
  int thismse;
  unsigned int halfiters = iters_per_step;
  unsigned int quarteriters = iters_per_step;
  unsigned int eighthiters = iters_per_step;

392
  const int y_stride = xd->plane[0].pre[0].stride;
393
  const int offset = bestmv->row * y_stride + bestmv->col;
394
  uint8_t *y = xd->plane[0].pre[0].buf + offset;
395

396
397
398
399
  int rr = ref_mv->row;
  int rc = ref_mv->col;
  int br = bestmv->row * 8;
  int bc = bestmv->col * 8;
400
  int hstep = 4;
401
402
403
404
  const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
  const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
  const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
  const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
405

406
407
  int tr = br;
  int tc = bc;
408
409

  // central mv
410
411
  bestmv->row *= 8;
  bestmv->col *= 8;
412
413
414
415

  // calculate central point error
  besterr = vfp->vf(y, y_stride, z, src_stride, sse1);
  *distortion = besterr;
416
  besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436

  // 1/2 pel
  FIRST_LEVEL_CHECKS;
  if (halfiters > 1) {
    SECOND_LEVEL_CHECKS;
  }
  tr = br;
  tc = bc;

  // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
  if (forced_stop != 2) {
    hstep >>= 1;
    FIRST_LEVEL_CHECKS;
    if (quarteriters > 1) {
      SECOND_LEVEL_CHECKS;
    }
    tr = br;
    tc = bc;
  }

437
  if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) {
438
439
440
441
442
443
444
445
446
    hstep >>= 1;
    FIRST_LEVEL_CHECKS;
    if (eighthiters > 1) {
      SECOND_LEVEL_CHECKS;
    }
    tr = br;
    tc = bc;
  }

447
448
  bestmv->row = br;
  bestmv->col = bc;
449

450
451
  if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
      (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
452
453
454
455
456
    return INT_MAX;

  return besterr;
}

457
458
459
460
461
462
#undef DIST
/* returns subpixel variance error function */
#define DIST(r, c) \
    vfp->svaf(PRE(r, c), y_stride, SP(c), SP(r), \
              z, src_stride, &sse, second_pred)

463
int vp9_find_best_sub_pixel_comp_iterative(MACROBLOCK *x,
464
                                           MV *bestmv, const MV *ref_mv,
465
                                           int allow_hp,
466
467
468
469
470
471
472
473
474
                                           int error_per_bit,
                                           const vp9_variance_fn_ptr_t *vfp,
                                           int forced_stop,
                                           int iters_per_step,
                                           int *mvjcost, int *mvcost[2],
                                           int *distortion,
                                           unsigned int *sse1,
                                           const uint8_t *second_pred,
                                           int w, int h) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
475
476
477
  uint8_t *const z = x->plane[0].src.buf;
  const int src_stride = x->plane[0].src.stride;
  MACROBLOCKD *const xd = &x->e_mbd;
478
479
480
481

  unsigned int besterr = INT_MAX;
  unsigned int sse;
  unsigned int whichdir;
482
483
484
  unsigned int halfiters = iters_per_step;
  unsigned int quarteriters = iters_per_step;
  unsigned int eighthiters = iters_per_step;
485
486
  int thismse;

487
  DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
Dmitry Kovalev's avatar
Dmitry Kovalev committed
488
  const int y_stride = xd->plane[0].pre[0].stride;
489
  const int offset = bestmv->row * y_stride + bestmv->col;
490
  uint8_t *const y = xd->plane[0].pre[0].buf + offset;
491

492
493
494
495
  int rr = ref_mv->row;
  int rc = ref_mv->col;
  int br = bestmv->row * 8;
  int bc = bestmv->col * 8;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
496
  int hstep = 4;
497
498
499
500
  const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
  const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
  const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
  const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
501

Dmitry Kovalev's avatar
Dmitry Kovalev committed
502
503
  int tr = br;
  int tc = bc;
504
505

  // central mv
506
507
  bestmv->row *= 8;
  bestmv->col *= 8;
508
509
510
511
512
513
514

  // calculate central point error
  // TODO(yunqingwang): central pointer error was already calculated in full-
  // pixel search, and can be passed in this function.
  comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride);
  besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
  *distortion = besterr;
515
  besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
516
517
518

  // Each subsequent iteration checks at least one point in
  // common with the last iteration could be 2 ( if diag selected)
519
  while (halfiters--) {
520
    // 1/2 pel
521
    FIRST_LEVEL_CHECKS;
522
523
524
525
526
527
528
529
530
531
    // no reason to check the same one again.
    if (tr == br && tc == bc)
      break;
    tr = br;
    tc = bc;
  }

  // Each subsequent iteration checks at least one point in common with
  // the last iteration could be 2 ( if diag selected) 1/4 pel

532
533
534
535
  // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
  if (forced_stop != 2) {
    hstep >>= 1;
    while (quarteriters--) {
536
      FIRST_LEVEL_CHECKS;
537
538
539
540
541
542
      // no reason to check the same one again.
      if (tr == br && tc == bc)
        break;
      tr = br;
      tc = bc;
    }
543
544
  }

545
  if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) {
546
    hstep >>= 1;
547
    while (eighthiters--) {
548
      FIRST_LEVEL_CHECKS;
549
550
551
552
553
554
555
      // no reason to check the same one again.
      if (tr == br && tc == bc)
        break;
      tr = br;
      tc = bc;
    }
  }
556
557
  bestmv->row = br;
  bestmv->col = bc;
558

559
560
  if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
      (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
561
562
563
564
565
566
    return INT_MAX;

  return besterr;
}

int vp9_find_best_sub_pixel_comp_tree(MACROBLOCK *x,
567
                                      MV *bestmv, const MV *ref_mv,
568
                                      int allow_hp,
569
570
571
572
573
574
575
576
577
578
                                      int error_per_bit,
                                      const vp9_variance_fn_ptr_t *vfp,
                                      int forced_stop,
                                      int iters_per_step,
                                      int *mvjcost, int *mvcost[2],
                                      int *distortion,
                                      unsigned int *sse1,
                                      const uint8_t *second_pred,
                                      int w, int h) {
  uint8_t *z = x->plane[0].src.buf;
579
  const int src_stride = x->plane[0].src.stride;
580
581
582
583
584
585
586
587
588
589
  MACROBLOCKD *xd = &x->e_mbd;
  unsigned int besterr = INT_MAX;
  unsigned int sse;
  unsigned int whichdir;
  int thismse;
  unsigned int halfiters = iters_per_step;
  unsigned int quarteriters = iters_per_step;
  unsigned int eighthiters = iters_per_step;

  DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
590
  const int y_stride = xd->plane[0].pre[0].stride;
591
  const int offset = bestmv->row * y_stride + bestmv->col;
592
  uint8_t *y = xd->plane[0].pre[0].buf + offset;
593

594
595
596
597
  int rr = ref_mv->row;
  int rc = ref_mv->col;
  int br = bestmv->row * 8;
  int bc = bestmv->col * 8;
598
  int hstep = 4;
599
600
601
602
  const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
  const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
  const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
  const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
603

604
605
  int tr = br;
  int tc = bc;
606
607

  // central mv
608
609
  bestmv->row *= 8;
  bestmv->col *= 8;
610
611
612
613
614
615
616

  // calculate central point error
  // TODO(yunqingwang): central pointer error was already calculated in full-
  // pixel search, and can be passed in this function.
  comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride);
  besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
  *distortion = besterr;
617
  besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642

  // Each subsequent iteration checks at least one point in
  // common with the last iteration could be 2 ( if diag selected)
  // 1/2 pel
  FIRST_LEVEL_CHECKS;
  if (halfiters > 1) {
    SECOND_LEVEL_CHECKS;
  }
  tr = br;
  tc = bc;

  // Each subsequent iteration checks at least one point in common with
  // the last iteration could be 2 ( if diag selected) 1/4 pel

  // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
  if (forced_stop != 2) {
    hstep >>= 1;
    FIRST_LEVEL_CHECKS;
    if (quarteriters > 1) {
      SECOND_LEVEL_CHECKS;
    }
    tr = br;
    tc = bc;
  }

643
  if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) {
644
645
646
647
648
649
650
651
    hstep >>= 1;
    FIRST_LEVEL_CHECKS;
    if (eighthiters > 1) {
      SECOND_LEVEL_CHECKS;
    }
    tr = br;
    tc = bc;
  }
652
653
  bestmv->row = br;
  bestmv->col = bc;
654

655
656
  if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
      (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
657
658
659
660
    return INT_MAX;

  return besterr;
}
661

John Koleszar's avatar
John Koleszar committed
662
663
664
#undef MVC
#undef PRE
#undef DIST
665
#undef IFMVCV
John Koleszar's avatar
John Koleszar committed
666
#undef CHECK_BETTER
667
668
#undef SP

Yunqing Wang's avatar
Yunqing Wang committed
669
#define CHECK_BOUNDS(range) \
John Koleszar's avatar
John Koleszar committed
670
  {\
Yunqing Wang's avatar
Yunqing Wang committed
671
672
673
674
675
    all_in = 1;\
    all_in &= ((br-range) >= x->mv_row_min);\
    all_in &= ((br+range) <= x->mv_row_max);\
    all_in &= ((bc-range) >= x->mv_col_min);\
    all_in &= ((bc+range) <= x->mv_col_max);\
John Koleszar's avatar
John Koleszar committed
676
  }
Yunqing Wang's avatar
Yunqing Wang committed
677
678

#define CHECK_POINT \
John Koleszar's avatar
John Koleszar committed
679
  {\
680
681
682
683
    if (this_mv.col < x->mv_col_min) continue;\
    if (this_mv.col > x->mv_col_max) continue;\
    if (this_mv.row < x->mv_row_min) continue;\
    if (this_mv.row > x->mv_row_max) continue;\
John Koleszar's avatar
John Koleszar committed
684
  }
Yunqing Wang's avatar
Yunqing Wang committed
685
686

#define CHECK_BETTER \
John Koleszar's avatar
John Koleszar committed
687
  {\
Yunqing Wang's avatar
Yunqing Wang committed
688
689
    if (thissad < bestsad)\
    {\
690
      if (use_mvcost) \
691
        thissad += mvsad_err_cost(&this_mv, &fcenter_mv.as_mv, \
692
693
                                  mvjsadcost, mvsadcost, \
                                  sad_per_bit);\
John Koleszar's avatar
John Koleszar committed
694
695
696
697
698
      if (thissad < bestsad)\
      {\
        bestsad = thissad;\
        best_site = i;\
      }\
Yunqing Wang's avatar
Yunqing Wang committed
699
    }\
John Koleszar's avatar
John Koleszar committed
700
701
  }

702
703
704
705
706
707
708
709
710
711
712
713
714
715
#define get_next_chkpts(list, i, n)   \
    list[0] = ((i) == 0 ? (n) - 1 : (i) - 1);  \
    list[1] = (i);                             \
    list[2] = ((i) == (n) - 1 ? 0 : (i) + 1);

#define MAX_PATTERN_SCALES         11
#define MAX_PATTERN_CANDIDATES      8  // max number of canddiates per scale
#define PATTERN_CANDIDATES_REF      3  // number of refinement candidates

// Generic pattern search function that searches over multiple scales.
// Each scale can have a different number of candidates and shape of
// candidates as indicated in the num_candidates and candidates arrays
// passed into this function
static int vp9_pattern_search(MACROBLOCK *x,
716
                              MV *ref_mv,
717
718
719
720
721
722
                              int search_param,
                              int sad_per_bit,
                              int do_init_search,
                              int do_refine,
                              const vp9_variance_fn_ptr_t *vfp,
                              int use_mvcost,
723
                              const MV *center_mv, MV *best_mv,
724
725
726
                              const int num_candidates[MAX_PATTERN_SCALES],
                              const MV candidates[MAX_PATTERN_SCALES]
                                                 [MAX_PATTERN_CANDIDATES]) {
727
  const MACROBLOCKD* const xd = &x->e_mbd;
728
729
730
731
  static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = {
    10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
  };
  int i, j, s, t;
John Koleszar's avatar
John Koleszar committed
732
733
  uint8_t *what = x->plane[0].src.buf;
  int what_stride = x->plane[0].src.stride;
734
  int in_what_stride = xd->plane[0].pre[0].stride;
John Koleszar's avatar
John Koleszar committed
735
  int br, bc;
736
  MV this_mv;
737
738
  int bestsad = INT_MAX;
  int thissad;
739
740
  uint8_t *base_offset;
  uint8_t *this_offset;
John Koleszar's avatar
John Koleszar committed
741
742
743
744
  int k = -1;
  int all_in;
  int best_site = -1;
  int_mv fcenter_mv;
745
746
747
748
  int best_init_s = search_param_to_steps[search_param];
  int *mvjsadcost = x->nmvjointsadcost;
  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};

749
750
  fcenter_mv.as_mv.row = center_mv->row >> 3;
  fcenter_mv.as_mv.col = center_mv->col >> 3;
John Koleszar's avatar
John Koleszar committed
751
752

  // adjust ref_mv to make sure it is within MV range
753
754
755
  clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
  br = ref_mv->row;
  bc = ref_mv->col;
John Koleszar's avatar
John Koleszar committed
756
757

  // Work out the start point for the search
758
  base_offset = (uint8_t *)(xd->plane[0].pre[0].buf);
759
  this_offset = base_offset + (br * in_what_stride) + bc;
760
761
  this_mv.row = br;
  this_mv.col = bc;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
762
  bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 0x7fffffff)
763
                + mvsad_err_cost(&this_mv, &fcenter_mv.as_mv,
Dmitry Kovalev's avatar
Dmitry Kovalev committed
764
                                 mvjsadcost, mvsadcost, sad_per_bit);
John Koleszar's avatar
John Koleszar committed
765

766
767
768
769
770
771
772
773
774
775
776
  // Search all possible scales upto the search param around the center point
  // pick the scale of the point that is best as the starting scale of
  // further steps around it.
  if (do_init_search) {
    s = best_init_s;
    best_init_s = -1;
    for (t = 0; t <= s; ++t) {
      best_site = -1;
      CHECK_BOUNDS((1 << t))
      if (all_in) {
        for (i = 0; i < num_candidates[t]; i++) {
777
778
779
780
          this_mv.row = br + candidates[t][i].row;
          this_mv.col = bc + candidates[t][i].col;
          this_offset = base_offset + (this_mv.row * in_what_stride) +
                                       this_mv.col;
781
782
783
784
785
786
          thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
                             bestsad);
          CHECK_BETTER
        }
      } else {
        for (i = 0; i < num_candidates[t]; i++) {
787
788
          this_mv.row = br + candidates[t][i].row;
          this_mv.col = bc + candidates[t][i].col;
789
          CHECK_POINT
790
791
          this_offset = base_offset + (this_mv.row * in_what_stride) +
                                       this_mv.col;
792
793
794
795
796
797
798
799
800
801
802
          thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
                             bestsad);
          CHECK_BETTER
        }
      }
      if (best_site == -1) {
        continue;
      } else {
        best_init_s = t;
        k = best_site;
      }
John Koleszar's avatar
John Koleszar committed
803
    }
804
805
806
    if (best_init_s != -1) {
      br += candidates[best_init_s][k].row;
      bc += candidates[best_init_s][k].col;
John Koleszar's avatar
John Koleszar committed
807
808
809
    }
  }

810
811
812
813
  // If the center point is still the best, just skip this and move to
  // the refinement step.
  if (best_init_s != -1) {
    s = best_init_s;
John Koleszar's avatar
John Koleszar committed
814
    best_site = -1;
815
816
817
818
819
820
    do {
      // No need to search all 6 points the 1st time if initial search was used
      if (!do_init_search || s != best_init_s) {
        CHECK_BOUNDS((1 << s))
        if (all_in) {
          for (i = 0; i < num_candidates[s]; i++) {
821
822
823
824
            this_mv.row = br + candidates[s][i].row;
            this_mv.col = bc + candidates[s][i].col;
            this_offset = base_offset + (this_mv.row * in_what_stride) +
                                         this_mv.col;
825
826
827
828
829
830
            thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
                               bestsad);
            CHECK_BETTER
          }
        } else {
          for (i = 0; i < num_candidates[s]; i++) {
831
832
            this_mv.row = br + candidates[s][i].row;
            this_mv.col = bc + candidates[s][i].col;
833
            CHECK_POINT
834
835
            this_offset = base_offset + (this_mv.row * in_what_stride) +
                                         this_mv.col;
836
837
838
839
840
841
842
843
844
845
846
847
848
            thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
                               bestsad);
            CHECK_BETTER
          }
        }

        if (best_site == -1) {
          continue;
        } else {
          br += candidates[s][best_site].row;
          bc += candidates[s][best_site].col;
          k = best_site;
        }
John Koleszar's avatar
John Koleszar committed
849
      }
850

851
852
853
854
855
856
857
858
      do {
        int next_chkpts_indices[PATTERN_CANDIDATES_REF];
        best_site = -1;
        CHECK_BOUNDS((1 << s))

        get_next_chkpts(next_chkpts_indices, k, num_candidates[s]);
        if (all_in) {
          for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
859
860
861
862
            this_mv.row = br + candidates[s][next_chkpts_indices[i]].row;
            this_mv.col = bc + candidates[s][next_chkpts_indices[i]].col;
            this_offset = base_offset + (this_mv.row * (in_what_stride)) +
                                         this_mv.col;
863
864
865
866
867
868
            thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
                               bestsad);
            CHECK_BETTER
          }
        } else {
          for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
869
870
            this_mv.row = br + candidates[s][next_chkpts_indices[i]].row;
            this_mv.col = bc + candidates[s][next_chkpts_indices[i]].col;
871
            CHECK_POINT
872
873
            this_offset = base_offset + (this_mv.row * (in_what_stride)) +
                                         this_mv.col;
874
875
876
877
878
879
880
881
882
883
884
885
886
            thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
                               bestsad);
            CHECK_BETTER
          }
        }

        if (best_site != -1) {
          k = next_chkpts_indices[best_site];
          br += candidates[s][k].row;
          bc += candidates[s][k].col;
        }
      } while (best_site != -1);
    } while (s--);
John Koleszar's avatar
John Koleszar committed
887
  }
888

889
890
891
892
893
894
895
896
897
898
899
  // Check 4 1-away neighbors if do_refine is true.
  // For most well-designed schemes do_refine will not be necessary.
  if (do_refine) {
    static const MV neighbors[4] = {
      {0, -1}, { -1, 0}, {1, 0}, {0, 1},
    };
    for (j = 0; j < 16; j++) {
      best_site = -1;
      CHECK_BOUNDS(1)
      if (all_in) {
        for (i = 0; i < 4; i++) {
900
901
902
903
          this_mv.row = br + neighbors[i].row;
          this_mv.col = bc + neighbors[i].col;
          this_offset = base_offset + (this_mv.row * (in_what_stride)) +
                                       this_mv.col;
904
905
906
907
908
909
          thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
                             bestsad);
          CHECK_BETTER
        }
      } else {
        for (i = 0; i < 4; i++) {
910
911
          this_mv.row = br + neighbors[i].row;
          this_mv.col = bc + neighbors[i].col;
912
          CHECK_POINT
913
914
          this_offset = base_offset + (this_mv.row * (in_what_stride)) +
                                       this_mv.col;
915
916
917
918
919
          thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
                             bestsad);
          CHECK_BETTER
        }
          }
Yunqing Wang's avatar
Yunqing Wang committed
920

921
922
923
924
925
926
      if (best_site == -1) {
        break;
      } else {
        br += neighbors[best_site].row;
        bc += neighbors[best_site].col;
      }
John Koleszar's avatar
John Koleszar committed
927
    }
John Koleszar's avatar
John Koleszar committed
928
  }
John Koleszar's avatar
John Koleszar committed
929

930
931
  best_mv->row = br;
  best_mv->col = bc;
John Koleszar's avatar
John Koleszar committed
932

933
934
935
936
  this_offset = base_offset + (best_mv->row * in_what_stride) +
                               best_mv->col;
  this_mv.row = best_mv->row * 8;
  this_mv.col = best_mv->col * 8;
937
938
  if (bestsad == INT_MAX)
    return INT_MAX;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
939
940
941

  return vfp->vf(what, what_stride, this_offset, in_what_stride,
                 (unsigned int *)&bestsad) +
942
         use_mvcost ? mv_err_cost(&this_mv, center_mv,
Dmitry Kovalev's avatar
Dmitry Kovalev committed
943
944
                                  x->nmvjointcost, x->mvcost, x->errorperbit)
                    : 0;
945
946
947
948
}


int vp9_hex_search(MACROBLOCK *x,
949
                   MV *ref_mv,
950
951
952
953
954
                   int search_param,
                   int sad_per_bit,
                   int do_init_search,
                   const vp9_variance_fn_ptr_t *vfp,
                   int use_mvcost,
955
                   const MV *center_mv, MV *best_mv) {
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
  // First scale has 8-closest points, the rest have 6 points in hex shape
  // at increasing scales
  static const int hex_num_candidates[MAX_PATTERN_SCALES] = {
    8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6
  };
  // Note that the largest candidate step at each scale is 2^scale
  static const MV hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
    {{-1, -1}, {0, -1}, {1, -1}, {1, 0}, {1, 1}, { 0, 1}, { -1, 1}, {-1, 0}},
    {{-1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0}},
    {{-2, -4}, {2, -4}, {4, 0}, {2, 4}, { -2, 4}, { -4, 0}},
    {{-4, -8}, {4, -8}, {8, 0}, {4, 8}, { -4, 8}, { -8, 0}},
    {{-8, -16}, {8, -16}, {16, 0}, {8, 16}, { -8, 16}, { -16, 0}},
    {{-16, -32}, {16, -32}, {32, 0}, {16, 32}, { -16, 32}, { -32, 0}},
    {{-32, -64}, {32, -64}, {64, 0}, {32, 64}, { -32, 64}, { -64, 0}},
    {{-64, -128}, {64, -128}, {128, 0}, {64, 128}, { -64, 128}, { -128, 0}},
    {{-128, -256}, {128, -256}, {256, 0}, {128, 256}, { -128, 256}, { -256, 0}},
    {{-256, -512}, {256, -512}, {512, 0}, {256, 512}, { -256, 512}, { -512, 0}},
    {{-512, -1024}, {512, -1024}, {1024, 0}, {512, 1024}, { -512, 1024},
      { -1024, 0}},
  };
  return
      vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
                         do_init_search, 0, vfp, use_mvcost,
                         center_mv, best_mv,
                         hex_num_candidates, hex_candidates);
John Koleszar's avatar
John Koleszar committed
981
}
982
983

int vp9_bigdia_search(MACROBLOCK *x,
984
                      MV *ref_mv,
985
986
987
988
989
                      int search_param,
                      int sad_per_bit,
                      int do_init_search,
                      const vp9_variance_fn_ptr_t *vfp,
                      int use_mvcost,
990
991
                      const MV *center_mv,
                      MV *best_mv) {
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
  // First scale has 4-closest points, the rest have 8 points in diamond
  // shape at increasing scales
  static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = {
    4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  };
  // Note that the largest candidate step at each scale is 2^scale
  static const MV bigdia_candidates[MAX_PATTERN_SCALES]
                                   [MAX_PATTERN_CANDIDATES] = {
    {{0, -1}, {1, 0}, { 0, 1}, {-1, 0}},
    {{-1, -1}, {0, -2}, {1, -1}, {2, 0}, {1, 1}, {0, 2}, {-1, 1}, {-2, 0}},
    {{-2, -2}, {0, -4}, {2, -2}, {4, 0}, {2, 2}, {0, 4}, {-2, 2}, {-4, 0}},
    {{-4, -4}, {0, -8}, {4, -4}, {8, 0}, {4, 4}, {0, 8}, {-4, 4}, {-8, 0}},
    {{-8, -8}, {0, -16}, {8, -8}, {16, 0}, {8, 8}, {0, 16}, {-8, 8}, {-16, 0}},
    {{-16, -16}, {0, -32}, {16, -16}, {32, 0}, {16, 16}, {0, 32},
      {-16, 16}, {-32, 0}},
    {{-32, -32}, {0, -64}, {32, -32}, {64, 0}, {32, 32}, {0, 64},
      {-32, 32}, {-64, 0}},
    {{-64, -64}, {0, -128}, {64, -64}, {128, 0}, {64, 64}, {0, 128},
      {-64, 64}, {-128, 0}},
    {{-128, -128}, {0, -256}, {128, -128}, {256, 0}, {128, 128}, {0, 256},
      {-128, 128}, {-256, 0}},
    {{-256, -256}, {0, -512}, {256, -256}, {512, 0}, {256, 256}, {0, 512},
      {-256, 256}, {-512, 0}},
    {{-512, -512}, {0, -1024}, {512, -512}, {1024, 0}, {512, 512}, {0, 1024},
      {-512, 512}, {-1024, 0}},
  };
1018
1019
1020
1021
  return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
                            do_init_search, 0, vfp, use_mvcost,
                            center_mv, best_mv,
                            bigdia_num_candidates, bigdia_candidates);
1022
1023
1024
}

int vp9_square_search(MACROBLOCK *x,
1025
                      MV *ref_mv,
1026
1027
1028
1029
1030
                      int search_param,
                      int sad_per_bit,
                      int do_init_search,
                      const vp9_variance_fn_ptr_t *vfp,
                      int use_mvcost,
1031
1032
                      const MV *center_mv,
                      MV *best_mv) {
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
  // All scales have 8 closest points in square shape
  static const int square_num_candidates[MAX_PATTERN_SCALES] = {
    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  };
  // Note that the largest candidate step at each scale is 2^scale
  static const MV square_candidates[MAX_PATTERN_SCALES]
                                   [MAX_PATTERN_CANDIDATES] = {
    {{-1, -1}, {0, -1}, {1, -1}, {1, 0}, {1, 1}, {0, 1}, {-1, 1}, {-1, 0}},
    {{-2, -2}, {0, -2}, {2, -2}, {2, 0}, {2, 2}, {0, 2}, {-2, 2}, {-2, 0}},
    {{-4, -4}, {0, -4}, {4, -4}, {4, 0}, {4, 4}, {0, 4}, {-4, 4}, {-4, 0}},
    {{-8, -8}, {0, -8}, {8, -8}, {8, 0}, {8, 8}, {0, 8}, {-8, 8}, {-8, 0}},
    {{-16, -16}, {0, -16}, {16, -16}, {16, 0}, {16, 16}, {0, 16},
      {-16, 16}, {-16, 0}},
    {{-32, -32}, {0, -32}, {32, -32}, {32, 0}, {32, 32}, {0, 32},
      {-32, 32}, {-32, 0}},
    {{-64, -64}, {0, -64}, {64, -64}, {64, 0}, {64, 64}, {0, 64},
      {-64, 64}, {-64, 0}},
    {{-128, -128}, {0, -128}, {128, -128}, {128, 0}, {128, 128}, {0, 128},
      {-128, 128}, {-128, 0}},
    {{-256, -256}, {0, -256}, {256, -256}, {256, 0}, {256, 256}, {0, 256},
      {-256, 256}, {-256, 0}},
    {{-512, -512}, {0, -512}, {512, -512}, {512, 0}, {512, 512}, {0, 512},
      {-512, 512}, {-512, 0}},
    {{-1024, -1024}, {0, -1024}, {1024, -1024}, {1024, 0}, {1024, 1024},
      {0, 1024}, {-1024, 1024}, {-1024, 0}},
  };
1059
1060
1061
1062
  return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
                            do_init_search, 0, vfp, use_mvcost,
                            center_mv, best_mv,
                            square_num_candidates, square_candidates);
1063
1064
};

Yunqing Wang's avatar
Yunqing Wang committed
1065
1066
#undef CHECK_BOUNDS
#undef CHECK_POINT
John Koleszar's avatar
John Koleszar committed
1067
#undef CHECK_BETTER
1068

John Koleszar's avatar
John Koleszar committed
1069
int vp9_diamond_search_sad_c(MACROBLOCK *x,
Jim Bankoski's avatar
Jim Bankoski committed
1070
1071
                             int_mv *ref_mv, int_mv *best_mv,
                             int search_param, int sad_per_bit, int *num00,
1072
1073
                             vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost,
                             int *mvcost[2], int_mv *center_mv) {
John Koleszar's avatar
John Koleszar committed
1074
1075
  int i, j, step;

1076
  const MACROBLOCKD* const xd = &x->e_mbd;
John Koleszar's avatar
John Koleszar committed
1077
1078
  uint8_t *what = x->plane[0].src.buf;
  int what_stride = x->plane[0].src.stride;
1079
  uint8_t *in_what;
1080
  int in_what_stride = xd->plane[0].pre[0].stride;
1081
  uint8_t *best_address;
John Koleszar's avatar
John Koleszar committed
1082
1083
1084
1085
1086
1087
1088
1089

  int tot_steps;
  int_mv this_mv;

  int bestsad = INT_MAX;
  int best_site = 0;
  int last_site = 0;

1090
1091
  int ref_row, ref_col;
  int this_row_offset, this_col_offset;
John Koleszar's avatar
John Koleszar committed
1092
1093
  search_site *ss;

1094
  uint8_t *check_here;
John Koleszar's avatar
John Koleszar committed
1095
1096
  int thissad;
  int_mv fcenter_mv;
1097

1098
1099
1100
  int *mvjsadcost = x->nmvjointsadcost;
  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};

John Koleszar's avatar
John Koleszar committed
1101
1102
1103
  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;

1104
1105
  clamp_mv(&ref_mv->as_mv,
           x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
John Koleszar's avatar
John Koleszar committed
1106
1107
1108
1109
1110
1111
1112
  ref_row = ref_mv->as_mv.row;
  ref_col = ref_mv->as_mv.col;
  *num00 = 0;
  best_mv->as_mv.row = ref_row;
  best_mv->as_mv.col = ref_col;

  // Work out the start point for the search