vp9_mcomp.c 71.4 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
 */

Dmitry Kovalev's avatar
Dmitry Kovalev committed
11
12
#include <limits.h>
#include <math.h>
Dmitry Kovalev's avatar
Dmitry Kovalev committed
13
#include <stdio.h>
John Koleszar's avatar
John Koleszar committed
14

15
#include "./vpx_config.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
16
17
18

#include "vpx_mem/vpx_mem.h"

19
#include "vp9/common/vp9_findnearmv.h"
Ronald S. Bultje's avatar
Ronald S. Bultje committed
20
#include "vp9/common/vp9_common.h"
John Koleszar's avatar
John Koleszar committed
21

Dmitry Kovalev's avatar
Dmitry Kovalev committed
22
23
24
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_mcomp.h"

25
26
// #define NEW_DIAMOND_SEARCH

27
28
29
30
31
32
33
34
void vp9_clamp_mv_min_max(MACROBLOCK *x, MV *mv) {
  const int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0);
  const int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0);
  const int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL;
  const int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL;

  // Get intersection of UMV window and valid MV window to reduce # of checks
  // in diamond search.
35
36
37
38
39
40
41
42
43
44
  if (x->mv_col_min < col_min)
    x->mv_col_min = col_min;
  if (x->mv_col_max > col_max)
    x->mv_col_max = col_max;
  if (x->mv_row_min < row_min)
    x->mv_row_min = row_min;
  if (x->mv_row_max > row_max)
    x->mv_row_max = row_max;
}

45
int vp9_init_search_range(VP9_COMP *cpi, int size) {
46
47
  int sr = 0;

Paul Wilkins's avatar
Paul Wilkins committed
48
49
50
  // Minimum search size no matter what the passed in value.
  size = MAX(16, size);

51
  while ((size << sr) < MAX_FULL_PEL_VAL)
52
53
54
55
56
    sr++;

  if (sr)
    sr--;

57
58
  sr += cpi->sf.reduce_first_step_size;
  sr = MIN(sr, (cpi->sf.max_step_search_steps - 2));
59
60
61
  return sr;
}

62
int vp9_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvjcost, int *mvcost[2],
63
                    int weight) {
64
  MV v;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
65
66
  v.row = mv->as_mv.row - ref->as_mv.row;
  v.col = mv->as_mv.col - ref->as_mv.col;
67
68
69
  return ROUND_POWER_OF_TWO((mvjcost[vp9_get_mv_joint(&v)] +
                             mvcost[0][v.row] +
                             mvcost[1][v.col]) * weight, 7);
John Koleszar's avatar
John Koleszar committed
70
71
}

72
static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvjcost, int *mvcost[2],
73
                       int error_per_bit) {
74
75
  if (mvcost) {
    MV v;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
76
77
    v.row = mv->as_mv.row - ref->as_mv.row;
    v.col = mv->as_mv.col - ref->as_mv.col;
78
79
80
    return ROUND_POWER_OF_TWO((mvjcost[vp9_get_mv_joint(&v)] +
                               mvcost[0][v.row] +
                               mvcost[1][v.col]) * error_per_bit, 13);
81
  }
John Koleszar's avatar
John Koleszar committed
82
  return 0;
83
84
}

85
86
static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvjsadcost,
                          int *mvsadcost[2], int error_per_bit) {
87
88
  if (mvsadcost) {
    MV v;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
89
90
    v.row = mv->as_mv.row - ref->as_mv.row;
    v.col = mv->as_mv.col - ref->as_mv.col;
91
92
93
    return ROUND_POWER_OF_TWO((mvjsadcost[vp9_get_mv_joint(&v)] +
                               mvsadcost[0][v.row] +
                               mvsadcost[1][v.col]) * error_per_bit, 8);
94
  }
John Koleszar's avatar
John Koleszar committed
95
  return 0;
John Koleszar's avatar
John Koleszar committed
96
97
}

98
void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
99
  int len;
John Koleszar's avatar
John Koleszar committed
100
  int search_site_count = 0;
John Koleszar's avatar
John Koleszar committed
101

John Koleszar's avatar
John Koleszar committed
102
103
104
105
106
107
  // Generate offsets for 4 search sites per step.
  x->ss[search_site_count].mv.col = 0;
  x->ss[search_site_count].mv.row = 0;
  x->ss[search_site_count].offset = 0;
  search_site_count++;

Dmitry Kovalev's avatar
Dmitry Kovalev committed
108
  for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
John Koleszar's avatar
John Koleszar committed
109
110
    // Compute offsets for search sites.
    x->ss[search_site_count].mv.col = 0;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
111
112
    x->ss[search_site_count].mv.row = -len;
    x->ss[search_site_count].offset = -len * stride;
John Koleszar's avatar
John Koleszar committed
113
114
115
    search_site_count++;

    // Compute offsets for search sites.
John Koleszar's avatar
John Koleszar committed
116
    x->ss[search_site_count].mv.col = 0;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
117
118
    x->ss[search_site_count].mv.row = len;
    x->ss[search_site_count].offset = len * stride;
John Koleszar's avatar
John Koleszar committed
119
120
121
    search_site_count++;

    // Compute offsets for search sites.
Dmitry Kovalev's avatar
Dmitry Kovalev committed
122
    x->ss[search_site_count].mv.col = -len;
John Koleszar's avatar
John Koleszar committed
123
    x->ss[search_site_count].mv.row = 0;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
124
    x->ss[search_site_count].offset = -len;
John Koleszar's avatar
John Koleszar committed
125
126
    search_site_count++;

John Koleszar's avatar
John Koleszar committed
127
    // Compute offsets for search sites.
Dmitry Kovalev's avatar
Dmitry Kovalev committed
128
    x->ss[search_site_count].mv.col = len;
John Koleszar's avatar
John Koleszar committed
129
    x->ss[search_site_count].mv.row = 0;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
130
    x->ss[search_site_count].offset = len;
John Koleszar's avatar
John Koleszar committed
131
132
133
134
135
    search_site_count++;
  }

  x->ss_count = search_site_count;
  x->searches_per_step = 4;
John Koleszar's avatar
John Koleszar committed
136
137
}

138
void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) {
139
  int len, ss_count = 1;
John Koleszar's avatar
John Koleszar committed
140

141
142
  x->ss[0].mv.col = x->ss[0].mv.row = 0;
  x->ss[0].offset = 0;
John Koleszar's avatar
John Koleszar committed
143

Dmitry Kovalev's avatar
Dmitry Kovalev committed
144
  for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
145
146
147
148
149
150
151
152
153
154
155
    // Generate offsets for 8 search sites per step.
    const MV ss_mvs[8] = {
      { 0,   -len}, {0,    len}, {-len,  0  }, {len,  0  },
      {-len, -len}, {len, -len}, {-len,  len}, {len,  len}
    };
    int i;
    for (i = 0; i < 8; ++i) {
      search_site *const ss = &x->ss[ss_count++];
      ss->mv = ss_mvs[i];
      ss->offset = ss->mv.row * stride + ss->mv.col;
    }
John Koleszar's avatar
John Koleszar committed
156
157
  }

158
  x->ss_count = ss_count;
John Koleszar's avatar
John Koleszar committed
159
  x->searches_per_step = 8;
John Koleszar's avatar
John Koleszar committed
160
161
}

162
163
164
165
166
167
168
169
170
/*
 * To avoid the penalty for crossing cache-line read, preload the reference
 * area in a small buffer, which is aligned to make sure there won't be crossing
 * cache-line read while reading from this buffer. This reduced the cpu
 * cycles spent on reading ref data in sub-pixel filter functions.
 * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
 * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
 * could reduce the area.
 */
171

172
/* estimated cost of a motion vector (r,c) */
173
174
175
176
#define MVC(r, c)                                       \
    (mvcost ?                                           \
     ((mvjcost[((r) != rr) * 2 + ((c) != rc)] +         \
       mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \
177
178
      error_per_bit + 4096) >> 13 : 0)

179

180
181
#define SP(x) (((x) & 7) << 1)  // convert motion vector component to offset
                                // for svf calc
182

183
184
185
186
187
#define IFMVCV(r, c, s, e)                                \
    if (c >= minc && c <= maxc && r >= minr && r <= maxr) \
      s                                                   \
    else                                                  \
      e;
188
189

/* pointer to predictor base of a motionvector */
190
#define PRE(r, c) (y + (((r) >> 3) * y_stride + ((c) >> 3) -(offset)))
191

192
/* returns subpixel variance error function */
193
#define DIST(r, c) \
John Koleszar's avatar
John Koleszar committed
194
    vfp->svf(PRE(r, c), y_stride, SP(c), SP(r), z, src_stride, &sse)
195
196
197
198
199
200
201
202
203
204
205
206
207
208

/* checks if (r, c) has better score than previous best */
#define CHECK_BETTER(v, r, c) \
    IFMVCV(r, c, {                                                       \
      thismse = (DIST(r, c));                                            \
      if ((v = MVC(r, c) + thismse) < besterr) {                         \
        besterr = v;                                                     \
        br = r;                                                          \
        bc = c;                                                          \
        *distortion = thismse;                                           \
        *sse1 = sse;                                                     \
      }                                                                  \
    },                                                                   \
    v = INT_MAX;)
209

210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
#define FIRST_LEVEL_CHECKS                              \
  {                                                     \
    unsigned int left, right, up, down, diag;           \
    CHECK_BETTER(left, tr, tc - hstep);                 \
    CHECK_BETTER(right, tr, tc + hstep);                \
    CHECK_BETTER(up, tr - hstep, tc);                   \
    CHECK_BETTER(down, tr + hstep, tc);                 \
    whichdir = (left < right ? 0 : 1) +                 \
               (up < down ? 0 : 2);                     \
    switch (whichdir) {                                 \
      case 0:                                           \
        CHECK_BETTER(diag, tr - hstep, tc - hstep);     \
        break;                                          \
      case 1:                                           \
        CHECK_BETTER(diag, tr - hstep, tc + hstep);     \
        break;                                          \
      case 2:                                           \
        CHECK_BETTER(diag, tr + hstep, tc - hstep);     \
        break;                                          \
      case 3:                                           \
        CHECK_BETTER(diag, tr + hstep, tc + hstep);     \
        break;                                          \
    }                                                   \
  }

#define SECOND_LEVEL_CHECKS                             \
  {                                                     \
    int kr, kc;                                         \
    unsigned int second;                                \
    if (tr != br && tc != bc) {                         \
      kr = br - tr;                                     \
      kc = bc - tc;                                     \
      CHECK_BETTER(second, tr + kr, tc + 2 * kc);       \
      CHECK_BETTER(second, tr + 2 * kr, tc + kc);       \
    } else if (tr == br && tc != bc) {                  \
      kc = bc - tc;                                     \
      CHECK_BETTER(second, tr + hstep, tc + 2 * kc);    \
      CHECK_BETTER(second, tr - hstep, tc + 2 * kc);    \
      switch (whichdir) {                               \
        case 0:                                         \
        case 1:                                         \
          CHECK_BETTER(second, tr + hstep, tc + kc);    \
          break;                                        \
        case 2:                                         \
        case 3:                                         \
          CHECK_BETTER(second, tr - hstep, tc + kc);    \
          break;                                        \
      }                                                 \
    } else if (tr != br && tc == bc) {                  \
      kr = br - tr;                                     \
      CHECK_BETTER(second, tr + 2 * kr, tc + hstep);    \
      CHECK_BETTER(second, tr + 2 * kr, tc - hstep);    \
      switch (whichdir) {                               \
        case 0:                                         \
        case 2:                                         \
          CHECK_BETTER(second, tr + kr, tc + hstep);    \
          break;                                        \
        case 1:                                         \
        case 3:                                         \
          CHECK_BETTER(second, tr + kr, tc - hstep);    \
          break;                                        \
      }                                                 \
    }                                                   \
  }

275
276
277
278
279
280
281
282
283
int vp9_find_best_sub_pixel_iterative(MACROBLOCK *x,
                                      int_mv *bestmv, int_mv *ref_mv,
                                      int error_per_bit,
                                      const vp9_variance_fn_ptr_t *vfp,
                                      int forced_stop,
                                      int iters_per_step,
                                      int *mvjcost, int *mvcost[2],
                                      int *distortion,
                                      unsigned int *sse1) {
John Koleszar's avatar
John Koleszar committed
284
285
  uint8_t *z = x->plane[0].src.buf;
  int src_stride = x->plane[0].src.stride;
John Koleszar's avatar
John Koleszar committed
286
287
288
289
290
  MACROBLOCKD *xd = &x->e_mbd;

  unsigned int besterr = INT_MAX;
  unsigned int sse;
  unsigned int whichdir;
291
292
293
  unsigned int halfiters = iters_per_step;
  unsigned int quarteriters = iters_per_step;
  unsigned int eighthiters = iters_per_step;
John Koleszar's avatar
John Koleszar committed
294
  int thismse;
John Koleszar's avatar
John Koleszar committed
295

Dmitry Kovalev's avatar
Dmitry Kovalev committed
296
  const int y_stride = xd->plane[0].pre[0].stride;
297
298
  const int offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
  uint8_t *y = xd->plane[0].pre[0].buf + offset;
299

Dmitry Kovalev's avatar
Dmitry Kovalev committed
300
301
  int rr = ref_mv->as_mv.row;
  int rc = ref_mv->as_mv.col;
Yaowu Xu's avatar
Yaowu Xu committed
302
303
  int br = bestmv->as_mv.row * 8;
  int bc = bestmv->as_mv.col * 8;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
304
  int hstep = 4;
Yaowu Xu's avatar
Yaowu Xu committed
305
306
307
308
  const int minc = MAX(x->mv_col_min * 8, ref_mv->as_mv.col - MV_MAX);
  const int maxc = MIN(x->mv_col_max * 8, ref_mv->as_mv.col + MV_MAX);
  const int minr = MAX(x->mv_row_min * 8, ref_mv->as_mv.row - MV_MAX);
  const int maxr = MIN(x->mv_row_max * 8, ref_mv->as_mv.row + MV_MAX);
John Koleszar's avatar
John Koleszar committed
309

Dmitry Kovalev's avatar
Dmitry Kovalev committed
310
311
  int tr = br;
  int tc = bc;
John Koleszar's avatar
John Koleszar committed
312
313
314
315
316
317

  // central mv
  bestmv->as_mv.row <<= 3;
  bestmv->as_mv.col <<= 3;

  // calculate central point error
John Koleszar's avatar
John Koleszar committed
318
  besterr = vfp->vf(y, y_stride, z, src_stride, sse1);
John Koleszar's avatar
John Koleszar committed
319
  *distortion = besterr;
320
  besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
John Koleszar's avatar
John Koleszar committed
321

322
323
  // TODO: Each subsequent iteration checks at least one point in
  // common with the last iteration could be 2 ( if diag selected)
324
  while (halfiters--) {
John Koleszar's avatar
John Koleszar committed
325
    // 1/2 pel
326
    FIRST_LEVEL_CHECKS;
John Koleszar's avatar
John Koleszar committed
327
328
329
330
331
332
    // no reason to check the same one again.
    if (tr == br && tc == bc)
      break;
    tr = br;
    tc = bc;
  }
John Koleszar's avatar
John Koleszar committed
333

334
335
  // TODO: Each subsequent iteration checks at least one point in common with
  // the last iteration could be 2 ( if diag selected) 1/4 pel
John Koleszar's avatar
John Koleszar committed
336

337
338
339
340
  // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
  if (forced_stop != 2) {
    hstep >>= 1;
    while (quarteriters--) {
341
      FIRST_LEVEL_CHECKS;
342
343
344
345
346
347
      // no reason to check the same one again.
      if (tr == br && tc == bc)
        break;
      tr = br;
      tc = bc;
    }
John Koleszar's avatar
John Koleszar committed
348
  }
John Koleszar's avatar
John Koleszar committed
349

350
351
  if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv) &&
      forced_stop == 0) {
352
    hstep >>= 1;
353
    while (eighthiters--) {
354
      FIRST_LEVEL_CHECKS;
John Koleszar's avatar
John Koleszar committed
355
356
357
358
359
      // no reason to check the same one again.
      if (tr == br && tc == bc)
        break;
      tr = br;
      tc = bc;
360
    }
John Koleszar's avatar
John Koleszar committed
361
  }
362

363
364
  bestmv->as_mv.row = br;
  bestmv->as_mv.col = bc;
John Koleszar's avatar
John Koleszar committed
365

John Koleszar's avatar
John Koleszar committed
366
367
368
  if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) ||
      (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3)))
    return INT_MAX;
John Koleszar's avatar
John Koleszar committed
369

John Koleszar's avatar
John Koleszar committed
370
  return besterr;
John Koleszar's avatar
John Koleszar committed
371
}
372

373
374
375
376
377
378
379
380
381
382
int vp9_find_best_sub_pixel_tree(MACROBLOCK *x,
                                 int_mv *bestmv, int_mv *ref_mv,
                                 int error_per_bit,
                                 const vp9_variance_fn_ptr_t *vfp,
                                 int forced_stop,
                                 int iters_per_step,
                                 int *mvjcost, int *mvcost[2],
                                 int *distortion,
                                 unsigned int *sse1) {
  uint8_t *z = x->plane[0].src.buf;
383
  const int src_stride = x->plane[0].src.stride;
384
385
386
387
388
389
390
391
392
  MACROBLOCKD *xd = &x->e_mbd;
  unsigned int besterr = INT_MAX;
  unsigned int sse;
  unsigned int whichdir;
  int thismse;
  unsigned int halfiters = iters_per_step;
  unsigned int quarteriters = iters_per_step;
  unsigned int eighthiters = iters_per_step;

393
394
395
  const int y_stride = xd->plane[0].pre[0].stride;
  const int offset = bestmv->as_mv.row * y_stride + bestmv->as_mv.col;
  uint8_t *y = xd->plane[0].pre[0].buf + offset;
396

397
398
  int rr = ref_mv->as_mv.row;
  int rc = ref_mv->as_mv.col;
Yaowu Xu's avatar
Yaowu Xu committed
399
400
  int br = bestmv->as_mv.row * 8;
  int bc = bestmv->as_mv.col * 8;
401
  int hstep = 4;
Yaowu Xu's avatar
Yaowu Xu committed
402
403
404
405
  const int minc = MAX(x->mv_col_min * 8, ref_mv->as_mv.col - MV_MAX);
  const int maxc = MIN(x->mv_col_max * 8, ref_mv->as_mv.col + MV_MAX);
  const int minr = MAX(x->mv_row_min * 8, ref_mv->as_mv.row - MV_MAX);
  const int maxr = MIN(x->mv_row_max * 8, ref_mv->as_mv.row + MV_MAX);
406

407
408
  int tr = br;
  int tc = bc;
409
410

  // central mv
Yaowu Xu's avatar
Yaowu Xu committed
411
412
  bestmv->as_mv.row *= 8;
  bestmv->as_mv.col *= 8;
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458

  // calculate central point error
  besterr = vfp->vf(y, y_stride, z, src_stride, sse1);
  *distortion = besterr;
  besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);

  // 1/2 pel
  FIRST_LEVEL_CHECKS;
  if (halfiters > 1) {
    SECOND_LEVEL_CHECKS;
  }
  tr = br;
  tc = bc;

  // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
  if (forced_stop != 2) {
    hstep >>= 1;
    FIRST_LEVEL_CHECKS;
    if (quarteriters > 1) {
      SECOND_LEVEL_CHECKS;
    }
    tr = br;
    tc = bc;
  }

  if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv) &&
      forced_stop == 0) {
    hstep >>= 1;
    FIRST_LEVEL_CHECKS;
    if (eighthiters > 1) {
      SECOND_LEVEL_CHECKS;
    }
    tr = br;
    tc = bc;
  }

  bestmv->as_mv.row = br;
  bestmv->as_mv.col = bc;

  if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) ||
      (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3)))
    return INT_MAX;

  return besterr;
}

459
460
461
462
463
464
#undef DIST
/* returns subpixel variance error function */
#define DIST(r, c) \
    vfp->svaf(PRE(r, c), y_stride, SP(c), SP(r), \
              z, src_stride, &sse, second_pred)

465
466
467
468
469
470
471
472
473
474
475
int vp9_find_best_sub_pixel_comp_iterative(MACROBLOCK *x,
                                           int_mv *bestmv, int_mv *ref_mv,
                                           int error_per_bit,
                                           const vp9_variance_fn_ptr_t *vfp,
                                           int forced_stop,
                                           int iters_per_step,
                                           int *mvjcost, int *mvcost[2],
                                           int *distortion,
                                           unsigned int *sse1,
                                           const uint8_t *second_pred,
                                           int w, int h) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
476
477
478
  uint8_t *const z = x->plane[0].src.buf;
  const int src_stride = x->plane[0].src.stride;
  MACROBLOCKD *const xd = &x->e_mbd;
479
480
481
482

  unsigned int besterr = INT_MAX;
  unsigned int sse;
  unsigned int whichdir;
483
484
485
  unsigned int halfiters = iters_per_step;
  unsigned int quarteriters = iters_per_step;
  unsigned int eighthiters = iters_per_step;
486
487
  int thismse;

488
  DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
Dmitry Kovalev's avatar
Dmitry Kovalev committed
489
  const int y_stride = xd->plane[0].pre[0].stride;
490
491
  const int offset = bestmv->as_mv.row * y_stride + bestmv->as_mv.col;
  uint8_t *const y = xd->plane[0].pre[0].buf + offset;
492

Dmitry Kovalev's avatar
Dmitry Kovalev committed
493
494
  int rr = ref_mv->as_mv.row;
  int rc = ref_mv->as_mv.col;
Yaowu Xu's avatar
Yaowu Xu committed
495
496
  int br = bestmv->as_mv.row * 8;
  int bc = bestmv->as_mv.col * 8;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
497
  int hstep = 4;
Yaowu Xu's avatar
Yaowu Xu committed
498
499
500
501
  const int minc = MAX(x->mv_col_min * 8, ref_mv->as_mv.col - MV_MAX);
  const int maxc = MIN(x->mv_col_max * 8, ref_mv->as_mv.col + MV_MAX);
  const int minr = MAX(x->mv_row_min * 8, ref_mv->as_mv.row - MV_MAX);
  const int maxr = MIN(x->mv_row_max * 8, ref_mv->as_mv.row + MV_MAX);
502

Dmitry Kovalev's avatar
Dmitry Kovalev committed
503
504
  int tr = br;
  int tc = bc;
505
506

  // central mv
Yaowu Xu's avatar
Yaowu Xu committed
507
508
  bestmv->as_mv.row *= 8;
  bestmv->as_mv.col *= 8;
509
510
511
512
513
514
515

  // calculate central point error
  // TODO(yunqingwang): central pointer error was already calculated in full-
  // pixel search, and can be passed in this function.
  comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride);
  besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
  *distortion = besterr;
516
  besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
517
518
519

  // Each subsequent iteration checks at least one point in
  // common with the last iteration could be 2 ( if diag selected)
520
  while (halfiters--) {
521
    // 1/2 pel
522
    FIRST_LEVEL_CHECKS;
523
524
525
526
527
528
529
530
531
532
    // no reason to check the same one again.
    if (tr == br && tc == bc)
      break;
    tr = br;
    tc = bc;
  }

  // Each subsequent iteration checks at least one point in common with
  // the last iteration could be 2 ( if diag selected) 1/4 pel

533
534
535
536
  // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
  if (forced_stop != 2) {
    hstep >>= 1;
    while (quarteriters--) {
537
      FIRST_LEVEL_CHECKS;
538
539
540
541
542
543
      // no reason to check the same one again.
      if (tr == br && tc == bc)
        break;
      tr = br;
      tc = bc;
    }
544
545
  }

546
547
  if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv) &&
      forced_stop == 0) {
548
    hstep >>= 1;
549
    while (eighthiters--) {
550
      FIRST_LEVEL_CHECKS;
551
552
553
554
555
556
557
558
559
560
      // no reason to check the same one again.
      if (tr == br && tc == bc)
        break;
      tr = br;
      tc = bc;
    }
  }
  bestmv->as_mv.row = br;
  bestmv->as_mv.col = bc;

561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
  if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) ||
      (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3)))
    return INT_MAX;

  return besterr;
}

int vp9_find_best_sub_pixel_comp_tree(MACROBLOCK *x,
                                      int_mv *bestmv, int_mv *ref_mv,
                                      int error_per_bit,
                                      const vp9_variance_fn_ptr_t *vfp,
                                      int forced_stop,
                                      int iters_per_step,
                                      int *mvjcost, int *mvcost[2],
                                      int *distortion,
                                      unsigned int *sse1,
                                      const uint8_t *second_pred,
                                      int w, int h) {
  uint8_t *z = x->plane[0].src.buf;
580
  const int src_stride = x->plane[0].src.stride;
581
582
583
584
585
586
587
588
589
590
  MACROBLOCKD *xd = &x->e_mbd;
  unsigned int besterr = INT_MAX;
  unsigned int sse;
  unsigned int whichdir;
  int thismse;
  unsigned int halfiters = iters_per_step;
  unsigned int quarteriters = iters_per_step;
  unsigned int eighthiters = iters_per_step;

  DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
591
592
593
  const int y_stride = xd->plane[0].pre[0].stride;
  const int offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
  uint8_t *y = xd->plane[0].pre[0].buf + offset;
594

595
596
  int rr = ref_mv->as_mv.row;
  int rc = ref_mv->as_mv.col;
Yaowu Xu's avatar
Yaowu Xu committed
597
598
  int br = bestmv->as_mv.row * 8;
  int bc = bestmv->as_mv.col * 8;
599
  int hstep = 4;
Yaowu Xu's avatar
Yaowu Xu committed
600
601
602
603
  const int minc = MAX(x->mv_col_min * 8, ref_mv->as_mv.col - MV_MAX);
  const int maxc = MIN(x->mv_col_max * 8, ref_mv->as_mv.col + MV_MAX);
  const int minr = MAX(x->mv_row_min * 8, ref_mv->as_mv.row - MV_MAX);
  const int maxr = MIN(x->mv_row_max * 8, ref_mv->as_mv.row + MV_MAX);
604

605
606
  int tr = br;
  int tc = bc;
607
608

  // central mv
Yaowu Xu's avatar
Yaowu Xu committed
609
610
  bestmv->as_mv.row *= 8;
  bestmv->as_mv.col *= 8;
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656

  // calculate central point error
  // TODO(yunqingwang): central pointer error was already calculated in full-
  // pixel search, and can be passed in this function.
  comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride);
  besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
  *distortion = besterr;
  besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);

  // Each subsequent iteration checks at least one point in
  // common with the last iteration could be 2 ( if diag selected)
  // 1/2 pel
  FIRST_LEVEL_CHECKS;
  if (halfiters > 1) {
    SECOND_LEVEL_CHECKS;
  }
  tr = br;
  tc = bc;

  // Each subsequent iteration checks at least one point in common with
  // the last iteration could be 2 ( if diag selected) 1/4 pel

  // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
  if (forced_stop != 2) {
    hstep >>= 1;
    FIRST_LEVEL_CHECKS;
    if (quarteriters > 1) {
      SECOND_LEVEL_CHECKS;
    }
    tr = br;
    tc = bc;
  }

  if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv) &&
      forced_stop == 0) {
    hstep >>= 1;
    FIRST_LEVEL_CHECKS;
    if (eighthiters > 1) {
      SECOND_LEVEL_CHECKS;
    }
    tr = br;
    tc = bc;
  }
  bestmv->as_mv.row = br;
  bestmv->as_mv.col = bc;

657
658
659
660
661
662
  if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) ||
      (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3)))
    return INT_MAX;

  return besterr;
}
663

John Koleszar's avatar
John Koleszar committed
664
665
666
#undef MVC
#undef PRE
#undef DIST
667
#undef IFMVCV
John Koleszar's avatar
John Koleszar committed
668
#undef CHECK_BETTER
669
670
#undef SP

Yunqing Wang's avatar
Yunqing Wang committed
671
#define CHECK_BOUNDS(range) \
John Koleszar's avatar
John Koleszar committed
672
  {\
Yunqing Wang's avatar
Yunqing Wang committed
673
674
675
676
677
    all_in = 1;\
    all_in &= ((br-range) >= x->mv_row_min);\
    all_in &= ((br+range) <= x->mv_row_max);\
    all_in &= ((bc-range) >= x->mv_col_min);\
    all_in &= ((bc+range) <= x->mv_col_max);\
John Koleszar's avatar
John Koleszar committed
678
  }
Yunqing Wang's avatar
Yunqing Wang committed
679
680

#define CHECK_POINT \
John Koleszar's avatar
John Koleszar committed
681
  {\
Yunqing Wang's avatar
Yunqing Wang committed
682
683
684
685
    if (this_mv.as_mv.col < x->mv_col_min) continue;\
    if (this_mv.as_mv.col > x->mv_col_max) continue;\
    if (this_mv.as_mv.row < x->mv_row_min) continue;\
    if (this_mv.as_mv.row > x->mv_row_max) continue;\
John Koleszar's avatar
John Koleszar committed
686
  }
Yunqing Wang's avatar
Yunqing Wang committed
687
688

#define CHECK_BETTER \
John Koleszar's avatar
John Koleszar committed
689
  {\
Yunqing Wang's avatar
Yunqing Wang committed
690
691
    if (thissad < bestsad)\
    {\
692
693
694
695
      if (use_mvcost) \
        thissad += mvsad_err_cost(&this_mv, &fcenter_mv, \
                                  mvjsadcost, mvsadcost, \
                                  sad_per_bit);\
John Koleszar's avatar
John Koleszar committed
696
697
698
699
700
      if (thissad < bestsad)\
      {\
        bestsad = thissad;\
        best_site = i;\
      }\
Yunqing Wang's avatar
Yunqing Wang committed
701
    }\
John Koleszar's avatar
John Koleszar committed
702
703
  }

704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
#define get_next_chkpts(list, i, n)   \
    list[0] = ((i) == 0 ? (n) - 1 : (i) - 1);  \
    list[1] = (i);                             \
    list[2] = ((i) == (n) - 1 ? 0 : (i) + 1);

#define MAX_PATTERN_SCALES         11
#define MAX_PATTERN_CANDIDATES      8  // max number of canddiates per scale
#define PATTERN_CANDIDATES_REF      3  // number of refinement candidates

// Generic pattern search function that searches over multiple scales.
// Each scale can have a different number of candidates and shape of
// candidates as indicated in the num_candidates and candidates arrays
// passed into this function
static int vp9_pattern_search(MACROBLOCK *x,
                              int_mv *ref_mv,
                              int search_param,
                              int sad_per_bit,
                              int do_init_search,
                              int do_refine,
                              const vp9_variance_fn_ptr_t *vfp,
                              int use_mvcost,
                              int_mv *center_mv, int_mv *best_mv,
                              const int num_candidates[MAX_PATTERN_SCALES],
                              const MV candidates[MAX_PATTERN_SCALES]
                                                 [MAX_PATTERN_CANDIDATES]) {
729
  const MACROBLOCKD* const xd = &x->e_mbd;
730
731
732
733
  static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = {
    10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
  };
  int i, j, s, t;
John Koleszar's avatar
John Koleszar committed
734
735
  uint8_t *what = x->plane[0].src.buf;
  int what_stride = x->plane[0].src.stride;
736
  int in_what_stride = xd->plane[0].pre[0].stride;
John Koleszar's avatar
John Koleszar committed
737
738
  int br, bc;
  int_mv this_mv;
739
740
  int bestsad = INT_MAX;
  int thissad;
741
742
  uint8_t *base_offset;
  uint8_t *this_offset;
John Koleszar's avatar
John Koleszar committed
743
744
745
746
  int k = -1;
  int all_in;
  int best_site = -1;
  int_mv fcenter_mv;
747
748
749
750
  int best_init_s = search_param_to_steps[search_param];
  int *mvjsadcost = x->nmvjointsadcost;
  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};

John Koleszar's avatar
John Koleszar committed
751
752
753
754
  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;

  // adjust ref_mv to make sure it is within MV range
755
756
  clamp_mv(&ref_mv->as_mv,
           x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
John Koleszar's avatar
John Koleszar committed
757
758
759
760
  br = ref_mv->as_mv.row;
  bc = ref_mv->as_mv.col;

  // Work out the start point for the search
761
  base_offset = (uint8_t *)(xd->plane[0].pre[0].buf);
762
  this_offset = base_offset + (br * in_what_stride) + bc;
John Koleszar's avatar
John Koleszar committed
763
764
765
766
  this_mv.as_mv.row = br;
  this_mv.as_mv.col = bc;
  bestsad = vfp->sdf(what, what_stride, this_offset,
                     in_what_stride, 0x7fffffff)
767
768
            + mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost,
                             sad_per_bit);
John Koleszar's avatar
John Koleszar committed
769

770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
  // Search all possible scales upto the search param around the center point
  // pick the scale of the point that is best as the starting scale of
  // further steps around it.
  if (do_init_search) {
    s = best_init_s;
    best_init_s = -1;
    for (t = 0; t <= s; ++t) {
      best_site = -1;
      CHECK_BOUNDS((1 << t))
      if (all_in) {
        for (i = 0; i < num_candidates[t]; i++) {
          this_mv.as_mv.row = br + candidates[t][i].row;
          this_mv.as_mv.col = bc + candidates[t][i].col;
          this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
              this_mv.as_mv.col;
          thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
                             bestsad);
          CHECK_BETTER
        }
      } else {
        for (i = 0; i < num_candidates[t]; i++) {
          this_mv.as_mv.row = br + candidates[t][i].row;
          this_mv.as_mv.col = bc + candidates[t][i].col;
          CHECK_POINT
          this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
                        this_mv.as_mv.col;
          thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
                             bestsad);
          CHECK_BETTER
        }
      }
      if (best_site == -1) {
        continue;
      } else {
        best_init_s = t;
        k = best_site;
      }
John Koleszar's avatar
John Koleszar committed
807
    }
808
809
810
    if (best_init_s != -1) {
      br += candidates[best_init_s][k].row;
      bc += candidates[best_init_s][k].col;
John Koleszar's avatar
John Koleszar committed
811
812
813
    }
  }

814
815
816
817
  // If the center point is still the best, just skip this and move to
  // the refinement step.
  if (best_init_s != -1) {
    s = best_init_s;
John Koleszar's avatar
John Koleszar committed
818
    best_site = -1;
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
    do {
      // No need to search all 6 points the 1st time if initial search was used
      if (!do_init_search || s != best_init_s) {
        CHECK_BOUNDS((1 << s))
        if (all_in) {
          for (i = 0; i < num_candidates[s]; i++) {
            this_mv.as_mv.row = br + candidates[s][i].row;
            this_mv.as_mv.col = bc + candidates[s][i].col;
            this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
                this_mv.as_mv.col;
            thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
                               bestsad);
            CHECK_BETTER
          }
        } else {
          for (i = 0; i < num_candidates[s]; i++) {
            this_mv.as_mv.row = br + candidates[s][i].row;
            this_mv.as_mv.col = bc + candidates[s][i].col;
            CHECK_POINT
            this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
                          this_mv.as_mv.col;
            thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
                               bestsad);
            CHECK_BETTER
          }
        }

        if (best_site == -1) {
          continue;
        } else {
          br += candidates[s][best_site].row;
          bc += candidates[s][best_site].col;
          k = best_site;
        }
John Koleszar's avatar
John Koleszar committed
853
      }
854

855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
      do {
        int next_chkpts_indices[PATTERN_CANDIDATES_REF];
        best_site = -1;
        CHECK_BOUNDS((1 << s))

        get_next_chkpts(next_chkpts_indices, k, num_candidates[s]);
        if (all_in) {
          for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
            this_mv.as_mv.row = br +
                candidates[s][next_chkpts_indices[i]].row;
            this_mv.as_mv.col = bc +
                candidates[s][next_chkpts_indices[i]].col;
            this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
                this_mv.as_mv.col;
            thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
                               bestsad);
            CHECK_BETTER
          }
        } else {
          for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
            this_mv.as_mv.row = br +
                candidates[s][next_chkpts_indices[i]].row;
            this_mv.as_mv.col = bc +
                candidates[s][next_chkpts_indices[i]].col;
            CHECK_POINT
            this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
                          this_mv.as_mv.col;
            thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
                               bestsad);
            CHECK_BETTER
          }
        }

        if (best_site != -1) {
          k = next_chkpts_indices[best_site];
          br += candidates[s][k].row;
          bc += candidates[s][k].col;
        }
      } while (best_site != -1);
    } while (s--);
John Koleszar's avatar
John Koleszar committed
895
  }
896

897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
  // Check 4 1-away neighbors if do_refine is true.
  // For most well-designed schemes do_refine will not be necessary.
  if (do_refine) {
    static const MV neighbors[4] = {
      {0, -1}, { -1, 0}, {1, 0}, {0, 1},
    };
    for (j = 0; j < 16; j++) {
      best_site = -1;
      CHECK_BOUNDS(1)
      if (all_in) {
        for (i = 0; i < 4; i++) {
          this_mv.as_mv.row = br + neighbors[i].row;
          this_mv.as_mv.col = bc + neighbors[i].col;
          this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
              this_mv.as_mv.col;
          thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
                             bestsad);
          CHECK_BETTER
        }
      } else {
        for (i = 0; i < 4; i++) {
          this_mv.as_mv.row = br + neighbors[i].row;
          this_mv.as_mv.col = bc + neighbors[i].col;
          CHECK_POINT
          this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
                        this_mv.as_mv.col;
          thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
                             bestsad);
          CHECK_BETTER
        }
          }
Yunqing Wang's avatar
Yunqing Wang committed
928

929
930
931
932
933
934
      if (best_site == -1) {
        break;
      } else {
        br += neighbors[best_site].row;
        bc += neighbors[best_site].col;
      }
John Koleszar's avatar
John Koleszar committed
935
    }
John Koleszar's avatar
John Koleszar committed
936
  }
John Koleszar's avatar
John Koleszar committed
937

John Koleszar's avatar
John Koleszar committed
938
939
  best_mv->as_mv.row = br;
  best_mv->as_mv.col = bc;
John Koleszar's avatar
John Koleszar committed
940

941
942
  this_offset = base_offset + (best_mv->as_mv.row * (in_what_stride)) +
      best_mv->as_mv.col;
Yaowu Xu's avatar
Yaowu Xu committed
943
944
  this_mv.as_mv.row = best_mv->as_mv.row * 8;
  this_mv.as_mv.col = best_mv->as_mv.col * 8;
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
  if (bestsad == INT_MAX)
    return INT_MAX;
  return
      vfp->vf(what, what_stride, this_offset, in_what_stride,
              (unsigned int *)(&bestsad)) +
      use_mvcost ? mv_err_cost(&this_mv, center_mv, x->nmvjointcost, x->mvcost,
                               x->errorperbit) : 0;
}


int vp9_hex_search(MACROBLOCK *x,
                   int_mv *ref_mv,
                   int search_param,
                   int sad_per_bit,
                   int do_init_search,
                   const vp9_variance_fn_ptr_t *vfp,
                   int use_mvcost,
                   int_mv *center_mv, int_mv *best_mv) {
  // First scale has 8-closest points, the rest have 6 points in hex shape
  // at increasing scales
  static const int hex_num_candidates[MAX_PATTERN_SCALES] = {
    8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6
  };
  // Note that the largest candidate step at each scale is 2^scale
  static const MV hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
    {{-1, -1}, {0, -1}, {1, -1}, {1, 0}, {1, 1}, { 0, 1}, { -1, 1}, {-1, 0}},
    {{-1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0}},
    {{-2, -4}, {2, -4}, {4, 0}, {2, 4}, { -2, 4}, { -4, 0}},
    {{-4, -8}, {4, -8}, {8, 0}, {4, 8}, { -4, 8}, { -8, 0}},
    {{-8, -16}, {8, -16}, {16, 0}, {8, 16}, { -8, 16}, { -16, 0}},
    {{-16, -32}, {16, -32}, {32, 0}, {16, 32}, { -16, 32}, { -32, 0}},
    {{-32, -64}, {32, -64}, {64, 0}, {32, 64}, { -32, 64}, { -64, 0}},
    {{-64, -128}, {64, -128}, {128, 0}, {64, 128}, { -64, 128}, { -128, 0}},
    {{-128, -256}, {128, -256}, {256, 0}, {128, 256}, { -128, 256}, { -256, 0}},
    {{-256, -512}, {256, -512}, {512, 0}, {256, 512}, { -256, 512}, { -512, 0}},
    {{-512, -1024}, {512, -1024}, {1024, 0}, {512, 1024}, { -512, 1024},
      { -1024, 0}},
  };
  return
      vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
                         do_init_search, 0, vfp, use_mvcost,
                         center_mv, best_mv,
                         hex_num_candidates, hex_candidates);
John Koleszar's avatar
John Koleszar committed
988
}
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073

int vp9_bigdia_search(MACROBLOCK *x,
                      int_mv *ref_mv,
                      int search_param,
                      int sad_per_bit,
                      int do_init_search,
                      const vp9_variance_fn_ptr_t *vfp,
                      int use_mvcost,
                      int_mv *center_mv,
                      int_mv *best_mv) {
  // First scale has 4-closest points, the rest have 8 points in diamond
  // shape at increasing scales
  static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = {
    4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  };
  // Note that the largest candidate step at each scale is 2^scale
  static const MV bigdia_candidates[MAX_PATTERN_SCALES]
                                   [MAX_PATTERN_CANDIDATES] = {
    {{0, -1}, {1, 0}, { 0, 1}, {-1, 0}},
    {{-1, -1}, {0, -2}, {1, -1}, {2, 0}, {1, 1}, {0, 2}, {-1, 1}, {-2, 0}},
    {{-2, -2}, {0, -4}, {2, -2}, {4, 0}, {2, 2}, {0, 4}, {-2, 2}, {-4, 0}},
    {{-4, -4}, {0, -8}, {4, -4}, {8, 0}, {4, 4}, {0, 8}, {-4, 4}, {-8, 0}},
    {{-8, -8}, {0, -16}, {8, -8}, {16, 0}, {8, 8}, {0, 16}, {-8, 8}, {-16, 0}},
    {{-16, -16}, {0, -32}, {16, -16}, {32, 0}, {16, 16}, {0, 32},
      {-16, 16}, {-32, 0}},
    {{-32, -32}, {0, -64}, {32, -32}, {64, 0}, {32, 32}, {0, 64},
      {-32, 32}, {-64, 0}},
    {{-64, -64}, {0, -128}, {64, -64}, {128, 0}, {64, 64}, {0, 128},
      {-64, 64}, {-128, 0}},
    {{-128, -128}, {0, -256}, {128, -128}, {256, 0}, {128, 128}, {0, 256},
      {-128, 128}, {-256, 0}},
    {{-256, -256}, {0, -512}, {256, -256}, {512, 0}, {256, 256}, {0, 512},
      {-256, 256}, {-512, 0}},
    {{-512, -512}, {0, -1024}, {512, -512}, {1024, 0}, {512, 512}, {0, 1024},
      {-512, 512}, {-1024, 0}},
  };
  return
      vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
                         do_init_search, 0, vfp, use_mvcost,
                         center_mv, best_mv,
                         bigdia_num_candidates, bigdia_candidates);
}

int vp9_square_search(MACROBLOCK *x,
                      int_mv *ref_mv,
                      int search_param,
                      int sad_per_bit,
                      int do_init_search,
                      const vp9_variance_fn_ptr_t *vfp,
                      int use_mvcost,
                      int_mv *center_mv,
                      int_mv *best_mv) {
  // All scales have 8 closest points in square shape
  static const int square_num_candidates[MAX_PATTERN_SCALES] = {
    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  };
  // Note that the largest candidate step at each scale is 2^scale
  static const MV square_candidates[MAX_PATTERN_SCALES]
                                   [MAX_PATTERN_CANDIDATES] = {
    {{-1, -1}, {0, -1}, {1, -1}, {1, 0}, {1, 1}, {0, 1}, {-1, 1}, {-1, 0}},
    {{-2, -2}, {0, -2}, {2, -2}, {2, 0}, {2, 2}, {0, 2}, {-2, 2}, {-2, 0}},
    {{-4, -4}, {0, -4}, {4, -4}, {4, 0}, {4, 4}, {0, 4}, {-4, 4}, {-4, 0}},
    {{-8, -8}, {0, -8}, {8, -8}, {8, 0}, {8, 8}, {0, 8}, {-8, 8}, {-8, 0}},
    {{-16, -16}, {0, -16}, {16, -16}, {16, 0}, {16, 16}, {0, 16},
      {-16, 16}, {-16, 0}},
    {{-32, -32}, {0, -32}, {32, -32}, {32, 0}, {32, 32}, {0, 32},
      {-32, 32}, {-32, 0}},
    {{-64, -64}, {0, -64}, {64, -64}, {64, 0}, {64, 64}, {0, 64},
      {-64, 64}, {-64, 0}},
    {{-128, -128}, {0, -128}, {128, -128}, {128, 0}, {128, 128}, {0, 128},
      {-128, 128}, {-128, 0}},
    {{-256, -256}, {0, -256}, {256, -256}, {256, 0}, {256, 256}, {0, 256},
      {-256, 256}, {-256, 0}},
    {{-512, -512}, {0, -512}, {512, -512}, {512, 0}, {512, 512}, {0, 512},
      {-512, 512}, {-512, 0}},
    {{-1024, -1024}, {0, -1024}, {1024, -1024}, {1024, 0}, {1024, 1024},
      {0, 1024}, {-1024, 1024}, {-1024, 0}},
  };
  return
      vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
                         do_init_search, 0, vfp, use_mvcost,
                         center_mv, best_mv,
                         square_num_candidates, square_candidates);
};

Yunqing Wang's avatar
Yunqing Wang committed
1074
1075
#undef CHECK_BOUNDS
#undef CHECK_POINT
John Koleszar's avatar
John Koleszar committed
1076
#undef CHECK_BETTER
1077

John Koleszar's avatar
John Koleszar committed
1078
int vp9_diamond_search_sad_c(MACROBLOCK *x,
Jim Bankoski's avatar
Jim Bankoski committed
1079
1080
                             int_mv *ref_mv, int_mv *best_mv,
                             int search_param, int sad_per_bit, int *num00,
1081
1082
                             vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost,
                             int *mvcost[2], int_mv *center_mv) {
John Koleszar's avatar
John Koleszar committed
1083
1084
  int i, j, step;

1085
  const MACROBLOCKD* const xd = &x->e_mbd;
John Koleszar's avatar
John Koleszar committed
1086
1087
  uint8_t *what = x->plane[0].src.buf;
  int what_stride = x->plane[0].src.stride;
1088
  uint8_t *in_what;
1089
  int in_what_stride = xd->plane[0].pre[0].stride;
1090
  uint8_t *best_address;
John Koleszar's avatar
John Koleszar committed
1091
1092
1093
1094
1095
1096
1097
1098

  int tot_steps;
  int_mv this_mv;

  int bestsad = INT_MAX;
  int best_site = 0;
  int last_site = 0;

1099
1100
  int ref_row, ref_col;
  int this_row_offset, this_col_offset;
John Koleszar's avatar
John Koleszar committed
1101
1102
  search_site *ss;

1103
  uint8_t *check_here;
John Koleszar's avatar
John Koleszar committed
1104
1105
  int thissad;
  int_mv fcenter_mv;
1106

1107
1108
1109
  int *mvjsadcost = x->nmvjointsadcost;
  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};

John Koleszar's avatar
John Koleszar committed
1110
1111
1112
  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;

1113
1114
  clamp_mv(&ref_mv->as_mv,
           x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
John Koleszar's avatar
John Koleszar committed
1115
1116
1117
1118
1119
1120
1121
  ref_row = ref_mv->as_mv.row;
  ref_col = ref_mv->as_mv.col;
  *num00 = 0;
  best_mv->as_mv.row = ref_row;
  best_mv->as_mv.col = ref_col;

  // Work out the start point for the search
1122
1123
  in_what = (uint8_t *)(xd->plane[0].pre[0].buf +
                        (ref_row * (xd->plane[0].pre[0].stride)) + ref_col);
John Koleszar's avatar
John Koleszar committed
1124
1125
1126
1127
1128
  best_address = in_what;

  // Check the starting position
  bestsad = fn_ptr->sdf(what, what_stride, in_what,
                        in_what_stride, 0x7fffffff)
1129
1130
            + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost,
                             sad_per_bit);
John Koleszar's avatar
John Koleszar committed
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144

  // search_param determines the length of the initial step and hence the number of iterations
  // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
  ss = &x->ss[search_param * x->searches_per_step];
  tot_steps = (x->ss_count / x->searches_per_step) - search_param;

  i = 1;

  for (step = 0; step < tot_steps; step++) {
    for (j = 0; j < x->searches_per_step; j++) {
      // Trap illegal vectors
      this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
      this_col_offset = best_mv->as_mv.col + ss[i].mv.col;

1145
1146
1147
1148
      if ((this_col_offset > x->mv_col_min) &&
          (this_col_offset < x->mv_col_max) &&
          (this_row_offset > x->mv_row_min) &&
          (this_row_offset < x->mv_row_max)) {
John Koleszar's avatar
John Koleszar committed
1149
        check_here = ss[i].offset + best_address;
1150
1151
        thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
                              bestsad);
John Koleszar's avatar
John Koleszar committed
1152
1153
1154
1155
1156

        if (thissad < bestsad) {
          this_mv.as_mv.row = this_row_offset;
          this_mv.as_mv.col = this_col_offset;
          thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1157
                                    mvjsadcost, mvsadcost, sad_per_bit);
John Koleszar's avatar
John Koleszar committed
1158
1159
1160
1161
1162

          if (thissad < bestsad) {
            bestsad = thissad;
            best_site = i;
          }
John Koleszar's avatar
John Koleszar committed
1163
        }
John Koleszar's avatar
John Koleszar committed
1164
      }
John Koleszar's avatar
John Koleszar committed
1165

John Koleszar's avatar
John Koleszar committed
1166
      i++;
John Koleszar's avatar
John Koleszar committed
1167
1168
    }

John Koleszar's avatar
John Koleszar committed
1169
1170
1171
1172
1173
    if (best_site != last_site) {
      best_mv->as_mv.row += ss[best_site].mv.row;
      best_mv->as_mv.col += ss[best_site].mv.col;
      best_address += ss[best_site].offset;
      last_site = best_site;
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
#if defined(NEW_DIAMOND_SEARCH)
      while (1) {
        this_row_offset = best_mv->as_mv.row + ss[best_site].mv.row;
        this_col_offset = best_mv->as_mv.col + ss[best_site].mv.col;
        if ((this_col_offset > x->mv_col_min) &&
            (this_col_offset < x->mv_col_max) &&
            (this_row_offset > x->mv_row_min) &&
            (this_row_offset < x->mv_row_max)) {
          check_here = ss[best_site].offset + best_address;
          thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
                                bestsad);
          if (thissad < bestsad) {
            this_mv.as_mv.row = this_row_offset;
            this_mv.as_mv.col = this_col_offset;
            thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
                                      mvjsadcost, mvsadcost, sad_per_bit);
            if (thissad < bestsad) {
              bestsad = thissad;
              best_mv->as_mv.row += ss[best_site].mv.row;
              best_mv->as_mv.col += ss[best_site].mv.col;
              best_address += ss[best_site].offset;
              continue;
            }
          }
        }
        break;
      };
#endif
John Koleszar's avatar
John Koleszar committed
1202
1203
1204
    } else if (best_address == in_what)
      (*num00)++;
  }
John Koleszar's avatar
John Koleszar committed
1205

Yaowu Xu's avatar
Yaowu Xu committed
1206
1207
  this_mv.as_mv.row = best_mv->as_mv.row * 8;
  this_mv.as_mv.col = best_mv->as_mv.col * 8;
John Koleszar's avatar
John Koleszar committed
1208

John Koleszar's avatar
John Koleszar committed
1209
1210
1211
  if (bestsad == INT_MAX)
    return INT_MAX;

1212
1213
1214
  return fn_ptr->vf(what, what_stride, best_address, in_what_stride,
         (unsigned int *)(&thissad)) + mv_err_cost(&this_mv, center_mv, mvjcost,
                                                   mvcost, x->errorperbit);
John Koleszar's avatar
John Koleszar committed
1215
1216
}

John Koleszar's avatar
John Koleszar committed
1217
int vp9_diamond_search_sadx4(MACROBLOCK *x,
1218
1219
                             int_mv *ref_mv, int_mv *best_mv, int search_param,
                             int sad_per_bit, int *num00,
1220
                             vp9_variance_fn_ptr_t *fn_ptr,
1221
                             int *mvjcost, int *mvcost[2], int_mv *center_mv) {
John Koleszar's avatar
John Koleszar committed
1222
1223
  int i, j, step;

1224
  const MACROBLOCKD* const xd = &x->e_mbd;
John Koleszar's avatar
John Koleszar committed
1225
1226
  uint8_t *what = x->plane[0].src.buf;
  int what_stride = x->plane[0].src.stride;
1227
  uint8_t *in_what;
1228
  int in_what_stride = xd->plane[0].pre[0].stride;
1229
  uint8_t *best_address;
John Koleszar's avatar
John Koleszar committed
1230
1231
1232
1233

  int tot_steps;
  int_mv this_mv;

1234
  unsigned int bestsad = INT_MAX;
John Koleszar's avatar
John Koleszar committed
1235
1236
1237
1238
1239
1240
1241
1242
1243
  int best_site = 0;
  int last_site = 0;

  int ref_row;
  int ref_col;
  int this_row_offset;
  int this_col_offset;
  search_site *ss;

1244
  uint8_t *check_here;
John Koleszar's avatar
John Koleszar committed
1245
1246
  unsigned int thissad;
  int_mv fcenter_mv;
1247

1248
1249
1250
  int *mvjsadcost = x->nmvjointsadcost;
  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};

John Koleszar's avatar
John Koleszar committed
1251
1252
1253
  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;

1254
1255
  clamp_mv(&ref_mv->as_mv,
           x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
John Koleszar's avatar
John Koleszar committed
1256
1257
1258
1259
1260
1261
1262
  ref_row = ref_mv->as_mv.row;
  ref_col = ref_mv->as_mv.col;
  *num00 = 0;
  best_mv->as_mv.row = ref_row;
  best_mv->as_mv.col = ref_col;

  // Work out the start point for the search