vp9_mcomp.c 75.7 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
 */

Dmitry Kovalev's avatar
Dmitry Kovalev committed
11
12
#include <limits.h>
#include <math.h>
Dmitry Kovalev's avatar
Dmitry Kovalev committed
13
#include <stdio.h>
John Koleszar's avatar
John Koleszar committed
14

15
#include "./vpx_config.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
16
17
18

#include "vpx_mem/vpx_mem.h"

Ronald S. Bultje's avatar
Ronald S. Bultje committed
19
#include "vp9/common/vp9_common.h"
John Koleszar's avatar
John Koleszar committed
20

Dmitry Kovalev's avatar
Dmitry Kovalev committed
21
#include "vp9/encoder/vp9_encoder.h"
Dmitry Kovalev's avatar
Dmitry Kovalev committed
22
23
#include "vp9/encoder/vp9_mcomp.h"

24
25
// #define NEW_DIAMOND_SEARCH

26
27
28
29
30
static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf,
                                             const MV *mv) {
  return &buf->buf[mv->row * buf->stride + mv->col];
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
31
void vp9_set_mv_search_range(MACROBLOCK *x, const MV *mv) {
32
33
34
35
36
37
38
39
40
  int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0);
  int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0);
  int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL;
  int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL;

  col_min = MAX(col_min, (MV_LOW >> 3) + 1);
  row_min = MAX(row_min, (MV_LOW >> 3) + 1);
  col_max = MIN(col_max, (MV_UPP >> 3) - 1);
  row_max = MIN(row_max, (MV_UPP >> 3) - 1);
41
42
43

  // Get intersection of UMV window and valid MV window to reduce # of checks
  // in diamond search.
44
45
46
47
48
49
50
51
52
53
  if (x->mv_col_min < col_min)
    x->mv_col_min = col_min;
  if (x->mv_col_max > col_max)
    x->mv_col_max = col_max;
  if (x->mv_row_min < row_min)
    x->mv_row_min = row_min;
  if (x->mv_row_max > row_max)
    x->mv_row_max = row_max;
}

54
int vp9_init_search_range(int size) {
55
  int sr = 0;
Paul Wilkins's avatar
Paul Wilkins committed
56
57
58
  // Minimum search size no matter what the passed in value.
  size = MAX(16, size);

59
  while ((size << sr) < MAX_FULL_PEL_VAL)
60
61
    sr++;

62
  sr = MIN(sr, MAX_MVSEARCH_STEPS - 2);
63
64
65
  return sr;
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
66
static INLINE int mv_cost(const MV *mv,
67
                          const int *joint_cost, int *const comp_cost[2]) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
68
69
  return joint_cost[vp9_get_mv_joint(mv)] +
             comp_cost[0][mv->row] + comp_cost[1][mv->col];
John Koleszar's avatar
John Koleszar committed
70
71
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
72
73
74
75
76
77
78
79
80
int vp9_mv_bit_cost(const MV *mv, const MV *ref,
                    const int *mvjcost, int *mvcost[2], int weight) {
  const MV diff = { mv->row - ref->row,
                    mv->col - ref->col };
  return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7);
}

static int mv_err_cost(const MV *mv, const MV *ref,
                       const int *mvjcost, int *mvcost[2],
81
                       int error_per_bit) {
82
  if (mvcost) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
83
84
85
86
    const MV diff = { mv->row - ref->row,
                      mv->col - ref->col };
    return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) *
                                  error_per_bit, 13);
87
  }
John Koleszar's avatar
John Koleszar committed
88
  return 0;
89
90
}

91
static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref,
Dmitry Kovalev's avatar
Dmitry Kovalev committed
92
                          int error_per_bit) {
93
  if (x->nmvsadcost) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
94
95
    const MV diff = { mv->row - ref->row,
                      mv->col - ref->col };
96
97
    return ROUND_POWER_OF_TWO(mv_cost(&diff, x->nmvjointsadcost,
                                      x->nmvsadcost) * error_per_bit, 8);
98
  }
John Koleszar's avatar
John Koleszar committed
99
  return 0;
John Koleszar's avatar
John Koleszar committed
100
101
}

102
void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) {
103
  int len, ss_count = 1;
John Koleszar's avatar
John Koleszar committed
104

105
106
  cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
  cfg->ss[0].offset = 0;
John Koleszar's avatar
John Koleszar committed
107

Dmitry Kovalev's avatar
Dmitry Kovalev committed
108
  for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
109
110
111
112
    // Generate offsets for 4 search sites per step.
    const MV ss_mvs[] = {{-len, 0}, {len, 0}, {0, -len}, {0, len}};
    int i;
    for (i = 0; i < 4; ++i) {
113
      search_site *const ss = &cfg->ss[ss_count++];
114
115
116
      ss->mv = ss_mvs[i];
      ss->offset = ss->mv.row * stride + ss->mv.col;
    }
John Koleszar's avatar
John Koleszar committed
117
118
  }

119
120
  cfg->ss_count = ss_count;
  cfg->searches_per_step = 4;
John Koleszar's avatar
John Koleszar committed
121
122
}

123
void vp9_init3smotion_compensation(search_site_config *cfg, int stride) {
124
  int len, ss_count = 1;
John Koleszar's avatar
John Koleszar committed
125

126
127
  cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
  cfg->ss[0].offset = 0;
John Koleszar's avatar
John Koleszar committed
128

Dmitry Kovalev's avatar
Dmitry Kovalev committed
129
  for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
130
131
    // Generate offsets for 8 search sites per step.
    const MV ss_mvs[8] = {
132
133
      {-len,  0  }, {len,  0  }, { 0,   -len}, {0,    len},
      {-len, -len}, {-len, len}, {len,  -len}, {len,  len}
134
135
136
    };
    int i;
    for (i = 0; i < 8; ++i) {
137
      search_site *const ss = &cfg->ss[ss_count++];
138
139
140
      ss->mv = ss_mvs[i];
      ss->offset = ss->mv.row * stride + ss->mv.col;
    }
John Koleszar's avatar
John Koleszar committed
141
142
  }

143
144
  cfg->ss_count = ss_count;
  cfg->searches_per_step = 8;
John Koleszar's avatar
John Koleszar committed
145
146
}

147
148
149
150
151
152
153
154
155
/*
 * To avoid the penalty for crossing cache-line read, preload the reference
 * area in a small buffer, which is aligned to make sure there won't be crossing
 * cache-line read while reading from this buffer. This reduced the cpu
 * cycles spent on reading ref data in sub-pixel filter functions.
 * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
 * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
 * could reduce the area.
 */
156

157
/* estimated cost of a motion vector (r,c) */
158
159
160
161
#define MVC(r, c)                                       \
    (mvcost ?                                           \
     ((mvjcost[((r) != rr) * 2 + ((c) != rc)] +         \
       mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \
162
163
      error_per_bit + 4096) >> 13 : 0)

164

165
166
167
168
// convert motion vector component to offset for svf calc
static INLINE int sp(int x) {
  return (x & 7) << 1;
}
169

170
171
static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
  return &buf[(r >> 3) * stride + (c >> 3)];
172
}
173

174
175
/* checks if (r, c) has better score than previous best */
#define CHECK_BETTER(v, r, c) \
Dmitry Kovalev's avatar
Dmitry Kovalev committed
176
  if (c >= minc && c <= maxc && r >= minr && r <= maxr) {              \
177
178
179
180
181
182
    if (second_pred == NULL)                                           \
      thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
                             src_stride, &sse);                        \
    else                                                               \
      thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
                              z, src_stride, &sse, second_pred);       \
Dmitry Kovalev's avatar
Dmitry Kovalev committed
183
184
185
186
187
188
189
190
191
192
    if ((v = MVC(r, c) + thismse) < besterr) {                         \
      besterr = v;                                                     \
      br = r;                                                          \
      bc = c;                                                          \
      *distortion = thismse;                                           \
      *sse1 = sse;                                                     \
    }                                                                  \
  } else {                                                             \
    v = INT_MAX;                                                       \
  }
193

194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
#define FIRST_LEVEL_CHECKS                              \
  {                                                     \
    unsigned int left, right, up, down, diag;           \
    CHECK_BETTER(left, tr, tc - hstep);                 \
    CHECK_BETTER(right, tr, tc + hstep);                \
    CHECK_BETTER(up, tr - hstep, tc);                   \
    CHECK_BETTER(down, tr + hstep, tc);                 \
    whichdir = (left < right ? 0 : 1) +                 \
               (up < down ? 0 : 2);                     \
    switch (whichdir) {                                 \
      case 0:                                           \
        CHECK_BETTER(diag, tr - hstep, tc - hstep);     \
        break;                                          \
      case 1:                                           \
        CHECK_BETTER(diag, tr - hstep, tc + hstep);     \
        break;                                          \
      case 2:                                           \
        CHECK_BETTER(diag, tr + hstep, tc - hstep);     \
        break;                                          \
      case 3:                                           \
        CHECK_BETTER(diag, tr + hstep, tc + hstep);     \
        break;                                          \
    }                                                   \
  }

#define SECOND_LEVEL_CHECKS                             \
  {                                                     \
    int kr, kc;                                         \
    unsigned int second;                                \
    if (tr != br && tc != bc) {                         \
      kr = br - tr;                                     \
      kc = bc - tc;                                     \
      CHECK_BETTER(second, tr + kr, tc + 2 * kc);       \
      CHECK_BETTER(second, tr + 2 * kr, tc + kc);       \
    } else if (tr == br && tc != bc) {                  \
      kc = bc - tc;                                     \
      CHECK_BETTER(second, tr + hstep, tc + 2 * kc);    \
      CHECK_BETTER(second, tr - hstep, tc + 2 * kc);    \
      switch (whichdir) {                               \
        case 0:                                         \
        case 1:                                         \
          CHECK_BETTER(second, tr + hstep, tc + kc);    \
          break;                                        \
        case 2:                                         \
        case 3:                                         \
          CHECK_BETTER(second, tr - hstep, tc + kc);    \
          break;                                        \
      }                                                 \
    } else if (tr != br && tc == bc) {                  \
      kr = br - tr;                                     \
      CHECK_BETTER(second, tr + 2 * kr, tc + hstep);    \
      CHECK_BETTER(second, tr + 2 * kr, tc - hstep);    \
      switch (whichdir) {                               \
        case 0:                                         \
        case 2:                                         \
          CHECK_BETTER(second, tr + kr, tc + hstep);    \
          break;                                        \
        case 1:                                         \
        case 3:                                         \
          CHECK_BETTER(second, tr + kr, tc - hstep);    \
          break;                                        \
      }                                                 \
    }                                                   \
  }

259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
#define SETUP_SUBPEL_SEARCH                                                \
  const uint8_t *const z = x->plane[0].src.buf;                            \
  const int src_stride = x->plane[0].src.stride;                           \
  const MACROBLOCKD *xd = &x->e_mbd;                                       \
  unsigned int besterr = INT_MAX;                                          \
  unsigned int sse;                                                        \
  unsigned int whichdir;                                                   \
  int thismse;                                                             \
  const unsigned int halfiters = iters_per_step;                           \
  const unsigned int quarteriters = iters_per_step;                        \
  const unsigned int eighthiters = iters_per_step;                         \
  const int y_stride = xd->plane[0].pre[0].stride;                         \
  const int offset = bestmv->row * y_stride + bestmv->col;                 \
  const uint8_t *const y = xd->plane[0].pre[0].buf;                        \
                                                                           \
  int rr = ref_mv->row;                                                    \
  int rc = ref_mv->col;                                                    \
  int br = bestmv->row * 8;                                                \
  int bc = bestmv->col * 8;                                                \
  int hstep = 4;                                                           \
  const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);           \
  const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);           \
  const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);           \
  const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);           \
  int tr = br;                                                             \
  int tc = bc;                                                             \
                                                                           \
  bestmv->row *= 8;                                                        \
287
  bestmv->col *= 8;
288

289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
#if CONFIG_VP9_HIGHBITDEPTH
#define SETUP_CENTER_ERROR                                                   \
  if (second_pred != NULL) {                                                 \
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {                       \
      DECLARE_ALIGNED_ARRAY(16, uint16_t, comp_pred16, 64 * 64);             \
      vp9_high_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset,     \
                             y_stride);                                      \
      besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, z, src_stride,   \
                        sse1);                                               \
    } else {                                                                 \
      DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);                \
      vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); \
      besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);                  \
    }                                                                        \
  } else {                                                                   \
    besterr = vfp->vf(y + offset, y_stride, z, src_stride, sse1);            \
  }                                                                          \
  *distortion = besterr;                                                     \
  besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);

#else

#define SETUP_CENTER_ERROR                                                   \
  if (second_pred != NULL) {                                                 \
    DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);                  \
    vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);   \
    besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);                    \
  } else {                                                                   \
    besterr = vfp->vf(y + offset, y_stride, z, src_stride, sse1);            \
  }                                                                          \
  *distortion = besterr;                                                     \
  besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
#endif  // CONFIG_VP9_HIGHBITDEPTH




static INLINE int divide_and_round(const int n, const int d) {
  return ((n < 0) ^ (d < 0)) ? ((n - d / 2) / d) : ((n + d / 2) / d);
}

330
331
332
333
334
static INLINE int is_cost_list_wellbehaved(int *cost_list) {
  return cost_list[0] < cost_list[1] &&
         cost_list[0] < cost_list[2] &&
         cost_list[0] < cost_list[3] &&
         cost_list[0] < cost_list[4];
335
336
337
338
339
340
341
342
343
344
}

// Returns surface minima estimate at given precision in 1/2^n bits.
// Assume a model for the cost surface: S = A(x - x0)^2 + B(y - y0)^2 + C
// For a given set of costs S0, S1, S2, S3, S4 at points
// (y, x) = (0, 0), (0, -1), (1, 0), (0, 1) and (-1, 0) respectively,
// the solution for the location of the minima (x0, y0) is given by:
// x0 = 1/2 (S1 - S3)/(S1 + S3 - 2*S0),
// y0 = 1/2 (S4 - S2)/(S4 + S2 - 2*S0).
// The code below is an integerized version of that.
345
static void get_cost_surf_min(int *cost_list, int *ir, int *ic,
346
                              int bits) {
347
348
349
350
  *ic = divide_and_round((cost_list[1] - cost_list[3]) * (1 << (bits - 1)),
                         (cost_list[1] - 2 * cost_list[0] + cost_list[3]));
  *ir = divide_and_round((cost_list[4] - cost_list[2]) * (1 << (bits - 1)),
                         (cost_list[4] - 2 * cost_list[0] + cost_list[2]));
351
352
}

353
354
355
356
357
358
359
360
361
362
363
364
365
366
int vp9_find_best_sub_pixel_tree_pruned_evenmore(
    const MACROBLOCK *x,
    MV *bestmv, const MV *ref_mv,
    int allow_hp,
    int error_per_bit,
    const vp9_variance_fn_ptr_t *vfp,
    int forced_stop,
    int iters_per_step,
    int *cost_list,
    int *mvjcost, int *mvcost[2],
    int *distortion,
    unsigned int *sse1,
    const uint8_t *second_pred,
    int w, int h) {
367
  SETUP_SUBPEL_SEARCH;
368
369
370
371
372
373
374
375
376
  SETUP_CENTER_ERROR;
  (void) halfiters;
  (void) quarteriters;
  (void) eighthiters;
  (void) whichdir;
  (void) allow_hp;
  (void) forced_stop;
  (void) hstep;

377
378
379
380
381
  if (cost_list &&
      cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
      cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
      cost_list[4] != INT_MAX &&
      is_cost_list_wellbehaved(cost_list)) {
382
383
    int ir, ic;
    unsigned int minpt;
384
    get_cost_surf_min(cost_list, &ir, &ic, 2);
385
    if (ir != 0 || ic != 0) {
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
      CHECK_BETTER(minpt, tr + 2 * ir, tc + 2 * ic);
    }
  } else {
    FIRST_LEVEL_CHECKS;
    if (halfiters > 1) {
      SECOND_LEVEL_CHECKS;
    }

    tr = br;
    tc = bc;

    // Each subsequent iteration checks at least one point in common with
    // the last iteration could be 2 ( if diag selected) 1/4 pel
    // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
    if (forced_stop != 2) {
      hstep >>= 1;
      FIRST_LEVEL_CHECKS;
      if (quarteriters > 1) {
        SECOND_LEVEL_CHECKS;
      }
    }
  }

  tr = br;
  tc = bc;

  if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) {
    hstep >>= 1;
    FIRST_LEVEL_CHECKS;
    if (eighthiters > 1) {
      SECOND_LEVEL_CHECKS;
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
    }
  }

  bestmv->row = br;
  bestmv->col = bc;

  if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
      (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
    return INT_MAX;

  return besterr;
}

int vp9_find_best_sub_pixel_tree_pruned_more(const MACROBLOCK *x,
                                             MV *bestmv, const MV *ref_mv,
                                             int allow_hp,
                                             int error_per_bit,
                                             const vp9_variance_fn_ptr_t *vfp,
                                             int forced_stop,
                                             int iters_per_step,
437
                                             int *cost_list,
438
439
440
441
442
443
444
                                             int *mvjcost, int *mvcost[2],
                                             int *distortion,
                                             unsigned int *sse1,
                                             const uint8_t *second_pred,
                                             int w, int h) {
  SETUP_SUBPEL_SEARCH;
  SETUP_CENTER_ERROR;
445
446
447
448
449
  if (cost_list &&
      cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
      cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
      cost_list[4] != INT_MAX &&
      is_cost_list_wellbehaved(cost_list)) {
450
451
    unsigned int minpt;
    int ir, ic;
452
    get_cost_surf_min(cost_list, &ir, &ic, 1);
453
454
    if (ir != 0 || ic != 0) {
      CHECK_BETTER(minpt, tr + ir * hstep, tc + ic * hstep);
455
456
    }
  } else {
457
458
459
460
    FIRST_LEVEL_CHECKS;
    if (halfiters > 1) {
      SECOND_LEVEL_CHECKS;
    }
461
  }
462

463
464
465
466
467
  // Each subsequent iteration checks at least one point in common with
  // the last iteration could be 2 ( if diag selected) 1/4 pel

  // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
  if (forced_stop != 2) {
468
469
    tr = br;
    tc = bc;
470
471
472
473
474
475
476
477
    hstep >>= 1;
    FIRST_LEVEL_CHECKS;
    if (quarteriters > 1) {
      SECOND_LEVEL_CHECKS;
    }
  }

  if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) {
478
479
    tr = br;
    tc = bc;
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
    hstep >>= 1;
    FIRST_LEVEL_CHECKS;
    if (eighthiters > 1) {
      SECOND_LEVEL_CHECKS;
    }
  }
  // These lines insure static analysis doesn't warn that
  // tr and tc aren't used after the above point.
  (void) tr;
  (void) tc;

  bestmv->row = br;
  bestmv->col = bc;

  if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
      (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
    return INT_MAX;

  return besterr;
}

int vp9_find_best_sub_pixel_tree_pruned(const MACROBLOCK *x,
                                        MV *bestmv, const MV *ref_mv,
                                        int allow_hp,
                                        int error_per_bit,
                                        const vp9_variance_fn_ptr_t *vfp,
                                        int forced_stop,
                                        int iters_per_step,
508
                                        int *cost_list,
509
510
511
512
513
514
515
                                        int *mvjcost, int *mvcost[2],
                                        int *distortion,
                                        unsigned int *sse1,
                                        const uint8_t *second_pred,
                                        int w, int h) {
  SETUP_SUBPEL_SEARCH;
  SETUP_CENTER_ERROR;
516
517
518
519
  if (cost_list &&
      cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
      cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
      cost_list[4] != INT_MAX) {
520
    unsigned int left, right, up, down, diag;
521
522
    whichdir = (cost_list[1] < cost_list[3] ? 0 : 1) +
               (cost_list[2] < cost_list[4] ? 0 : 2);
523
524
525
    switch (whichdir) {
      case 0:
        CHECK_BETTER(left, tr, tc - hstep);
526
527
        CHECK_BETTER(down, tr + hstep, tc);
        CHECK_BETTER(diag, tr + hstep, tc - hstep);
528
529
530
        break;
      case 1:
        CHECK_BETTER(right, tr, tc + hstep);
531
532
        CHECK_BETTER(down, tr + hstep, tc);
        CHECK_BETTER(diag, tr + hstep, tc + hstep);
533
534
535
        break;
      case 2:
        CHECK_BETTER(left, tr, tc - hstep);
536
537
        CHECK_BETTER(up, tr - hstep, tc);
        CHECK_BETTER(diag, tr - hstep, tc - hstep);
538
539
540
        break;
      case 3:
        CHECK_BETTER(right, tr, tc + hstep);
541
542
        CHECK_BETTER(up, tr - hstep, tc);
        CHECK_BETTER(diag, tr - hstep, tc + hstep);
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
        break;
    }
  } else {
    FIRST_LEVEL_CHECKS;
    if (halfiters > 1) {
      SECOND_LEVEL_CHECKS;
    }
  }

  tr = br;
  tc = bc;

  // Each subsequent iteration checks at least one point in common with
  // the last iteration could be 2 ( if diag selected) 1/4 pel

  // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
  if (forced_stop != 2) {
    hstep >>= 1;
    FIRST_LEVEL_CHECKS;
    if (quarteriters > 1) {
      SECOND_LEVEL_CHECKS;
    }
    tr = br;
    tc = bc;
  }

  if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) {
    hstep >>= 1;
    FIRST_LEVEL_CHECKS;
    if (eighthiters > 1) {
      SECOND_LEVEL_CHECKS;
    }
    tr = br;
    tc = bc;
  }
  // These lines insure static analysis doesn't warn that
  // tr and tc aren't used after the above point.
  (void) tr;
  (void) tc;

  bestmv->row = br;
  bestmv->col = bc;

  if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
      (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
    return INT_MAX;

  return besterr;
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
593
int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
594
                                 MV *bestmv, const MV *ref_mv,
595
                                 int allow_hp,
596
597
598
599
                                 int error_per_bit,
                                 const vp9_variance_fn_ptr_t *vfp,
                                 int forced_stop,
                                 int iters_per_step,
600
                                 int *cost_list,
601
602
                                 int *mvjcost, int *mvcost[2],
                                 int *distortion,
603
604
605
                                 unsigned int *sse1,
                                 const uint8_t *second_pred,
                                 int w, int h) {
606
  SETUP_SUBPEL_SEARCH;
607
  SETUP_CENTER_ERROR;
608
  (void) cost_list;  // to silence compiler warning
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633

  // Each subsequent iteration checks at least one point in
  // common with the last iteration could be 2 ( if diag selected)
  // 1/2 pel
  FIRST_LEVEL_CHECKS;
  if (halfiters > 1) {
    SECOND_LEVEL_CHECKS;
  }
  tr = br;
  tc = bc;

  // Each subsequent iteration checks at least one point in common with
  // the last iteration could be 2 ( if diag selected) 1/4 pel

  // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
  if (forced_stop != 2) {
    hstep >>= 1;
    FIRST_LEVEL_CHECKS;
    if (quarteriters > 1) {
      SECOND_LEVEL_CHECKS;
    }
    tr = br;
    tc = bc;
  }

634
  if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) {
635
636
637
638
639
640
641
642
    hstep >>= 1;
    FIRST_LEVEL_CHECKS;
    if (eighthiters > 1) {
      SECOND_LEVEL_CHECKS;
    }
    tr = br;
    tc = bc;
  }
643
644
645
646
647
  // These lines insure static analysis doesn't warn that
  // tr and tc aren't used after the above point.
  (void) tr;
  (void) tc;

648
649
  bestmv->row = br;
  bestmv->col = bc;
650

651
652
  if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
      (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
653
654
655
656
    return INT_MAX;

  return besterr;
}
657

John Koleszar's avatar
John Koleszar committed
658
659
660
#undef MVC
#undef PRE
#undef CHECK_BETTER
661

662
663
static INLINE int check_bounds(const MACROBLOCK *x, int row, int col,
                               int range) {
664
665
666
667
668
  return ((row - range) >= x->mv_row_min) &
         ((row + range) <= x->mv_row_max) &
         ((col - range) >= x->mv_col_min) &
         ((col + range) <= x->mv_col_max);
}
Yunqing Wang's avatar
Yunqing Wang committed
669

Dmitry Kovalev's avatar
Dmitry Kovalev committed
670
671
672
static INLINE int is_mv_in(const MACROBLOCK *x, const MV *mv) {
  return (mv->col >= x->mv_col_min) && (mv->col <= x->mv_col_max) &&
         (mv->row >= x->mv_row_min) && (mv->row <= x->mv_row_max);
673
}
Yunqing Wang's avatar
Yunqing Wang committed
674
675

#define CHECK_BETTER \
John Koleszar's avatar
John Koleszar committed
676
  {\
Dmitry Kovalev's avatar
Dmitry Kovalev committed
677
    if (thissad < bestsad) {\
678
      if (use_mvcost) \
679
        thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);\
Dmitry Kovalev's avatar
Dmitry Kovalev committed
680
      if (thissad < bestsad) {\
John Koleszar's avatar
John Koleszar committed
681
682
683
        bestsad = thissad;\
        best_site = i;\
      }\
Yunqing Wang's avatar
Yunqing Wang committed
684
    }\
John Koleszar's avatar
John Koleszar committed
685
686
  }

687
688
689
690
#define MAX_PATTERN_SCALES         11
#define MAX_PATTERN_CANDIDATES      8  // max number of canddiates per scale
#define PATTERN_CANDIDATES_REF      3  // number of refinement candidates

691
// Calculate and return a sad+mvcost list around an integer best pel.
692
693
694
695
696
697
static INLINE void calc_int_cost_list(const MACROBLOCK *x,
                                      const MV *ref_mv,
                                      int sadpb,
                                      const vp9_variance_fn_ptr_t *fn_ptr,
                                      const MV *best_mv,
                                      int *cost_list) {
698
699
700
701
702
703
704
705
  static const MV neighbors[4] = {{0, -1}, {1, 0}, {0, 1}, {-1, 0}};
  const struct buf_2d *const what = &x->plane[0].src;
  const struct buf_2d *const in_what = &x->e_mbd.plane[0].pre[0];
  const MV fcenter_mv = {ref_mv->row >> 3, ref_mv->col >> 3};
  int br = best_mv->row;
  int bc = best_mv->col;
  MV this_mv;
  int i;
706
  unsigned int sse;
707
708
709

  this_mv.row = br;
  this_mv.col = bc;
710
711
712
  cost_list[0] = fn_ptr->vf(what->buf, what->stride,
                            get_buf_from_mv(in_what, &this_mv),
                            in_what->stride, &sse) +
713
714
715
716
717
      mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb);
  if (check_bounds(x, br, bc, 1)) {
    for (i = 0; i < 4; i++) {
      const MV this_mv = {br + neighbors[i].row,
        bc + neighbors[i].col};
718
719
720
721
722
723
      cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride,
                                    get_buf_from_mv(in_what, &this_mv),
                                    in_what->stride, &sse) +
          // mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb);
          mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost, x->mvcost,
                      x->errorperbit);
724
725
726
727
728
729
730
731
    }
  } else {
    for (i = 0; i < 4; i++) {
      const MV this_mv = {br + neighbors[i].row,
        bc + neighbors[i].col};
      if (!is_mv_in(x, &this_mv))
        cost_list[i + 1] = INT_MAX;
      else
732
733
734
735
736
737
        cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride,
                                      get_buf_from_mv(in_what, &this_mv),
                                      in_what->stride, &sse) +
            // mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb);
            mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost, x->mvcost,
                        x->errorperbit);
738
739
740
741
    }
  }
}

742
743
744
745
// Generic pattern search function that searches over multiple scales.
// Each scale can have a different number of candidates and shape of
// candidates as indicated in the num_candidates and candidates arrays
// passed into this function
746
//
747
static int vp9_pattern_search(const MACROBLOCK *x,
748
                              MV *ref_mv,
749
750
                              int search_param,
                              int sad_per_bit,
751
                              int do_init_search,
752
                              int *cost_list,
753
754
                              const vp9_variance_fn_ptr_t *vfp,
                              int use_mvcost,
755
756
                              const MV *center_mv,
                              MV *best_mv,
757
758
759
                              const int num_candidates[MAX_PATTERN_SCALES],
                              const MV candidates[MAX_PATTERN_SCALES]
                                                 [MAX_PATTERN_CANDIDATES]) {
760
  const MACROBLOCKD *const xd = &x->e_mbd;
761
762
763
  static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = {
    10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
  };
764
  int i, s, t;
765
766
  const struct buf_2d *const what = &x->plane[0].src;
  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
John Koleszar's avatar
John Koleszar committed
767
  int br, bc;
768
769
  int bestsad = INT_MAX;
  int thissad;
John Koleszar's avatar
John Koleszar committed
770
  int k = -1;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
771
  const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
772
  int best_init_s = search_param_to_steps[search_param];
John Koleszar's avatar
John Koleszar committed
773
  // adjust ref_mv to make sure it is within MV range
774
775
776
  clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
  br = ref_mv->row;
  bc = ref_mv->col;
John Koleszar's avatar
John Koleszar committed
777
778

  // Work out the start point for the search
779
  bestsad = vfp->sdf(what->buf, what->stride,
780
781
                     get_buf_from_mv(in_what, ref_mv), in_what->stride) +
      mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
John Koleszar's avatar
John Koleszar committed
782

783
784
785
786
787
788
789
  // Search all possible scales upto the search param around the center point
  // pick the scale of the point that is best as the starting scale of
  // further steps around it.
  if (do_init_search) {
    s = best_init_s;
    best_init_s = -1;
    for (t = 0; t <= s; ++t) {
790
      int best_site = -1;
791
      if (check_bounds(x, br, bc, 1 << t)) {
792
        for (i = 0; i < num_candidates[t]; i++) {
793
794
795
796
          const MV this_mv = {br + candidates[t][i].row,
                              bc + candidates[t][i].col};
          thissad = vfp->sdf(what->buf, what->stride,
                             get_buf_from_mv(in_what, &this_mv),
797
                             in_what->stride);
798
799
800
801
          CHECK_BETTER
        }
      } else {
        for (i = 0; i < num_candidates[t]; i++) {
802
803
          const MV this_mv = {br + candidates[t][i].row,
                              bc + candidates[t][i].col};
Dmitry Kovalev's avatar
Dmitry Kovalev committed
804
          if (!is_mv_in(x, &this_mv))
805
            continue;
806
807
          thissad = vfp->sdf(what->buf, what->stride,
                             get_buf_from_mv(in_what, &this_mv),
808
                             in_what->stride);
809
810
811
812
813
814
815
816
817
          CHECK_BETTER
        }
      }
      if (best_site == -1) {
        continue;
      } else {
        best_init_s = t;
        k = best_site;
      }
John Koleszar's avatar
John Koleszar committed
818
    }
819
820
821
    if (best_init_s != -1) {
      br += candidates[best_init_s][k].row;
      bc += candidates[best_init_s][k].col;
John Koleszar's avatar
John Koleszar committed
822
823
824
    }
  }

825
826
827
  // If the center point is still the best, just skip this and move to
  // the refinement step.
  if (best_init_s != -1) {
828
    int best_site = -1;
829
    s = best_init_s;
830

831
832
833
    do {
      // No need to search all 6 points the 1st time if initial search was used
      if (!do_init_search || s != best_init_s) {
834
        if (check_bounds(x, br, bc, 1 << s)) {
835
          for (i = 0; i < num_candidates[s]; i++) {
836
837
838
839
            const MV this_mv = {br + candidates[s][i].row,
                                bc + candidates[s][i].col};
            thissad = vfp->sdf(what->buf, what->stride,
                               get_buf_from_mv(in_what, &this_mv),
840
                               in_what->stride);
841
842
843
844
            CHECK_BETTER
          }
        } else {
          for (i = 0; i < num_candidates[s]; i++) {
845
846
            const MV this_mv = {br + candidates[s][i].row,
                                bc + candidates[s][i].col};
Dmitry Kovalev's avatar
Dmitry Kovalev committed
847
            if (!is_mv_in(x, &this_mv))
848
              continue;
849
850
            thissad = vfp->sdf(what->buf, what->stride,
                               get_buf_from_mv(in_what, &this_mv),
851
                               in_what->stride);
852
853
854
855
856
857
858
859
860
861
862
            CHECK_BETTER
          }
        }

        if (best_site == -1) {
          continue;
        } else {
          br += candidates[s][best_site].row;
          bc += candidates[s][best_site].col;
          k = best_site;
        }
John Koleszar's avatar
John Koleszar committed
863
      }
864

865
866
867
      do {
        int next_chkpts_indices[PATTERN_CANDIDATES_REF];
        best_site = -1;
868
869
870
871
        next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
        next_chkpts_indices[1] = k;
        next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;

872
        if (check_bounds(x, br, bc, 1 << s)) {
873
          for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
874
875
876
877
            const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row,
                                bc + candidates[s][next_chkpts_indices[i]].col};
            thissad = vfp->sdf(what->buf, what->stride,
                               get_buf_from_mv(in_what, &this_mv),
878
                               in_what->stride);
879
880
881
882
            CHECK_BETTER
          }
        } else {
          for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
883
884
            const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row,
                                bc + candidates[s][next_chkpts_indices[i]].col};
Dmitry Kovalev's avatar
Dmitry Kovalev committed
885
            if (!is_mv_in(x, &this_mv))
886
              continue;
887
888
            thissad = vfp->sdf(what->buf, what->stride,
                               get_buf_from_mv(in_what, &this_mv),
889
                               in_what->stride);
890
891
892
893
894
895
896
897
898
899
900
            CHECK_BETTER
          }
        }

        if (best_site != -1) {
          k = next_chkpts_indices[best_site];
          br += candidates[s][k].row;
          bc += candidates[s][k].col;
        }
      } while (best_site != -1);
    } while (s--);
John Koleszar's avatar
John Koleszar committed
901
  }
902

903
  // Returns the one-away integer pel sad values around the best as follows:
904
905
906
907
908
909
910
911
  // cost_list[0]: cost at the best integer pel
  // cost_list[1]: cost at delta {0, -1} (left)   from the best integer pel
  // cost_list[2]: cost at delta { 1, 0} (bottom) from the best integer pel
  // cost_list[3]: cost at delta { 0, 1} (right)  from the best integer pel
  // cost_list[4]: cost at delta {-1, 0} (top)    from the best integer pel
  if (cost_list) {
    const MV best_mv = { br, bc };
    calc_int_cost_list(x, &fcenter_mv, sad_per_bit, vfp, &best_mv, cost_list);
912
913
914
915
916
917
918
  }
  best_mv->row = br;
  best_mv->col = bc;
  return bestsad;
}

// A specialized function where the smallest scale search candidates
919
// are 4 1-away neighbors, and cost_list is non-null
920
921
922
923
924
925
926
// TODO(debargha): Merge this function with the one above. Also remove
// use_mvcost option since it is always 1, to save unnecessary branches.
static int vp9_pattern_search_sad(const MACROBLOCK *x,
                                  MV *ref_mv,
                                  int search_param,
                                  int sad_per_bit,
                                  int do_init_search,
927
                                  int *cost_list,
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
                                  const vp9_variance_fn_ptr_t *vfp,
                                  int use_mvcost,
                                  const MV *center_mv,
                                  MV *best_mv,
                                  const int num_candidates[MAX_PATTERN_SCALES],
                                  const MV candidates[MAX_PATTERN_SCALES]
                                                     [MAX_PATTERN_CANDIDATES]) {
  const MACROBLOCKD *const xd = &x->e_mbd;
  static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = {
    10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
  };
  int i, s, t;
  const struct buf_2d *const what = &x->plane[0].src;
  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
  int br, bc;
  int bestsad = INT_MAX;
  int thissad;
  int k = -1;
  const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
  int best_init_s = search_param_to_steps[search_param];
  // adjust ref_mv to make sure it is within MV range
  clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
  br = ref_mv->row;
  bc = ref_mv->col;
952
953
  if (cost_list != NULL) {
    cost_list[0] = cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] =
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
        INT_MAX;
  }

  // Work out the start point for the search
  bestsad = vfp->sdf(what->buf, what->stride,
                     get_buf_from_mv(in_what, ref_mv), in_what->stride) +
      mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);

  // Search all possible scales upto the search param around the center point
  // pick the scale of the point that is best as the starting scale of
  // further steps around it.
  if (do_init_search) {
    s = best_init_s;
    best_init_s = -1;
    for (t = 0; t <= s; ++t) {
      int best_site = -1;
      if (check_bounds(x, br, bc, 1 << t)) {
        for (i = 0; i < num_candidates[t]; i++) {
          const MV this_mv = {br + candidates[t][i].row,
                              bc + candidates[t][i].col};
          thissad = vfp->sdf(what->buf, what->stride,
                             get_buf_from_mv(in_what, &this_mv),
                             in_what->stride);
          CHECK_BETTER
        }
      } else {
        for (i = 0; i < num_candidates[t]; i++) {
          const MV this_mv = {br + candidates[t][i].row,
                              bc + candidates[t][i].col};
          if (!is_mv_in(x, &this_mv))
            continue;
          thissad = vfp->sdf(what->buf, what->stride,
                             get_buf_from_mv(in_what, &this_mv),
                             in_what->stride);
          CHECK_BETTER
        }
      }
      if (best_site == -1) {
        continue;
      } else {
        best_init_s = t;
        k = best_site;
      }
    }
    if (best_init_s != -1) {
      br += candidates[best_init_s][k].row;
      bc += candidates[best_init_s][k].col;
    }
  }

  // If the center point is still the best, just skip this and move to
  // the refinement step.
  if (best_init_s != -1) {
1007
    int do_sad = (num_candidates[0] == 4 && cost_list != NULL);
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
    int best_site = -1;
    s = best_init_s;

    for (; s >= do_sad; s--) {
      if (!do_init_search || s != best_init_s) {
        if (check_bounds(x, br, bc, 1 << s)) {
          for (i = 0; i < num_candidates[s]; i++) {
            const MV this_mv = {br + candidates[s][i].row,
                                bc + candidates[s][i].col};
            thissad = vfp->sdf(what->buf, what->stride,
                               get_buf_from_mv(in_what, &this_mv),
                               in_what->stride);
            CHECK_BETTER
          }
        } else {
          for (i = 0; i < num_candidates[s]; i++) {
            const MV this_mv = {br + candidates[s][i].row,
                                bc + candidates[s][i].col};
            if (!is_mv_in(x, &this_mv))
              continue;
            thissad = vfp->sdf(what->buf, what->stride,
                               get_buf_from_mv(in_what, &this_mv),
                               in_what->stride);
            CHECK_BETTER
          }
        }

        if (best_site == -1) {
          continue;
        } else {
          br += candidates[s][best_site].row;
          bc += candidates[s][best_site].col;
          k = best_site;
        }
      }

      do {
        int next_chkpts_indices[PATTERN_CANDIDATES_REF];
        best_site = -1;
        next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
        next_chkpts_indices[1] = k;
        next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;

        if (check_bounds(x, br, bc, 1 << s)) {
          for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
            const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row,
                                bc + candidates[s][next_chkpts_indices[i]].col};
            thissad = vfp->sdf(what->buf, what->stride,
                               get_buf_from_mv(in_what, &this_mv),
                               in_what->stride);
            CHECK_BETTER
          }
        } else {
          for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
            const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row,
                                bc + candidates[s][next_chkpts_indices[i]].col};
            if (!is_mv_in(x, &this_mv))
              continue;
            thissad = vfp->sdf(what->buf, what->stride,
                               get_buf_from_mv(in_what, &this_mv),
                               in_what->stride);
            CHECK_BETTER
          }
        }

        if (best_site != -1) {
          k = next_chkpts_indices[best_site];
          br += candidates[s][k].row;
          bc += candidates[s][k].col;
        }
      } while (best_site != -1);
    }

1081
    // Note: If we enter the if below, then cost_list must be non-NULL.
1082
    if (s == 0) {
1083
      cost_list[0] = bestsad;
1084
1085
1086
1087
1088
      if (!do_init_search || s != best_init_s) {
        if (check_bounds(x, br, bc, 1 << s)) {
          for (i = 0; i < num_candidates[s]; i++) {
            const MV this_mv = {br + candidates[s][i].row,
                                bc + candidates[s][i].col};
1089
            cost_list[i + 1] =
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
            thissad = vfp->sdf(what->buf, what->stride,
                               get_buf_from_mv(in_what, &this_mv),
                               in_what->stride);
            CHECK_BETTER
          }
        } else {
          for (i = 0; i < num_candidates[s]; i++) {
            const MV this_mv = {br + candidates[s][i].row,
                                bc + candidates[s][i].col};
            if (!is_mv_in(x, &this_mv))
              continue;
1101
            cost_list[i + 1] =
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
            thissad = vfp->sdf(what->buf, what->stride,
                               get_buf_from_mv(in_what, &this_mv),
                               in_what->stride);
            CHECK_BETTER
          }
        }

        if (best_site != -1) {
          br += candidates[s][best_site].row;
          bc += candidates[s][best_site].col;
          k = best_site;
        }
      }
      while (best_site != -1) {
        int next_chkpts_indices[PATTERN_CANDIDATES_REF];
        best_site = -1;
        next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
        next_chkpts_indices[1] = k;
        next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
1121
1122
1123
        cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX;
        cost_list[((k + 2) % 4) + 1] = cost_list[0];
        cost_list[0] = bestsad;
1124
1125
1126
1127
1128

        if (check_bounds(x, br, bc, 1 << s)) {
          for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
            const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row,
                                bc + candidates[s][next_chkpts_indices[i]].col};
1129
            cost_list[next_chkpts_indices[i] + 1] =
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
            thissad = vfp->sdf(what->buf, what->stride,
                               get_buf_from_mv(in_what, &this_mv),
                               in_what->stride);
            CHECK_BETTER
          }
        } else {
          for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
            const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row,
                                bc + candidates[s][next_chkpts_indices[i]].col};
            if (!is_mv_in(x, &this_mv)) {
1140
              cost_list[next_chkpts_indices[i] + 1] = INT_MAX;
1141
1142
              continue;
            }
1143
            cost_list[next_chkpts_indices[i] + 1] =
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
            thissad = vfp->sdf(what->buf, what->stride,
                               get_buf_from_mv(in_what, &this_mv),
                               in_what->stride);
            CHECK_BETTER
          }
        }

        if (best_site != -1) {
          k = next_chkpts_indices[best_site];
          br += candidates[s][k].row;
          bc += candidates[s][k].col;
        }
      }
    }
  }

  // Returns the one-away integer pel sad values around the best as follows:
1161
1162
1163
1164
1165
1166
  // cost_list[0]: sad at the best integer pel
  // cost_list[1]: sad at delta {0, -1} (left)   from the best integer pel
  // cost_list[2]: sad at delta { 1, 0} (bottom) from the best integer pel
  // cost_list[3]: sad at delta { 0, 1} (right)  from the best integer pel
  // cost_list[4]: sad at delta {-1, 0} (top)    from the best integer pel
  if (cost_list) {
1167
    static const MV neighbors[4] = {{0, -1}, {1, 0}, {0, 1}, {-1, 0}};
1168
1169
    if (cost_list[0] == INT_MAX) {
      cost_list[0] = bestsad;
1170
1171
      if (check_bounds(x, br, bc, 1)) {
        for (i = 0; i < 4; i++) {
1172
1173
1174
          const MV this_mv = { br + neighbors[i].row,
                               bc + neighbors[i].col };
          cost_list[i + 1] = vfp->sdf(what->buf, what->stride,
1175
1176
                                     get_buf_from_mv(in_what, &this_mv),
                                     in_what->stride);
1177
1178
1179
1180
1181
1182
        }
      } else {
        for (i = 0; i < 4; i++) {
          const MV this_mv = {br + neighbors[i].row,
            bc + neighbors[i].col};
          if (!is_mv_in(x, &this_mv))
1183
            cost_list[i + 1] = INT_MAX;
1184
          else
1185
            cost_list[i + 1] = vfp->sdf(what->buf, what->stride,
1186
1187
1188
1189
1190
1191
1192
1193
1194
                                       get_buf_from_mv(in_what, &this_mv),
                                       in_what->stride);
        }
      }
    } else {
      if (use_mvcost) {
        for (i = 0; i < 4; i++) {
          const MV this_mv = {br + neighbors[i].row,
            bc + neighbors[i].col};
1195
1196
          if (cost_list[i + 1] != INT_MAX) {
            cost_list[i + 1] +=
1197
1198
1199
                mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
          }
        }
1200
      }
John Koleszar's avatar
John Koleszar committed
1201
    }
John Koleszar's avatar
John Koleszar committed
1202
  }
1203
1204
  best_mv->row = br;
  best_mv->col = bc;
Deb Mukherjee's avatar
Deb Mukherjee committed
1205
1206
  return bestsad;
}
Dmitry Kovalev's avatar
Dmitry Kovalev committed
1207

Deb Mukherjee's avatar
Deb Mukherjee committed
1208
int vp9_get_mvpred_var(const MACROBLOCK *x,
1209
                       const MV *best_mv, const MV *center_mv,
Deb Mukherjee's avatar
Deb Mukherjee committed
1210
1211
1212
                       const vp9_variance_fn_ptr_t *vfp,
                       int use_mvcost) {
  const MACROBLOCKD *const xd = &x->e_mbd;
1213
1214
  const struct buf_2d *const what = &x->plane[0].src;
  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1215
  const MV mv = {best_mv->row * 8, best_mv->col * 8};
1216
1217
1218
1219
  unsigned int unused;

  return vfp->vf(what->buf, what->stride,
                 get_buf_from_mv(in_what, best_mv), in_what->stride, &unused) +
1220
      (use_mvcost ?  mv_err_cost(&mv, center_mv, x->nmvjointcost,
Deb Mukherjee's avatar
Deb Mukherjee committed
1221
                                 x->mvcost, x->errorperbit) : 0);
1222
1223
}

Deb Mukherjee's avatar
Deb Mukherjee committed
1224
int vp9_get_mvpred_av_var(const MACROBLOCK *x,
1225
                          const MV *best_mv, const MV *center_mv,
Deb Mukherjee's avatar
Deb Mukherjee committed
1226
1227
1228
1229
                          const uint8_t *second_pred,
                          const vp9_variance_fn_ptr_t *vfp,
                          int use_mvcost) {
  const MACROBLOCKD *const xd = &x->e_mbd;
1230
1231
1232
1233
1234
1235
1236
1237
  const struct buf_2d *const what = &x->plane[0].src;
  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
  const MV mv = {best_mv->row * 8, best_mv->col * 8};
  unsigned int unused;

  return vfp->svaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0,
                   what->buf, what->stride, &unused, second_pred) +
      (use_mvcost ?  mv_err_cost(&mv, center_mv, x->nmvjointcost,
Deb Mukherjee's avatar
Deb Mukherjee committed
1238
1239
                                 x->mvcost, x->errorperbit) : 0);
}
1240

Dmitry Kovalev's avatar
Dmitry Kovalev committed
1241
int vp9_hex_search(const MACROBLOCK *x,
1242
                   MV *ref_mv,
1243
1244
1245
                   int search_param,
                   int sad_per_bit,
                   int do_init_search,
1246
                   int *cost_list,
1247
1248
                   const vp9_variance_fn_ptr_t *vfp,
                   int use_mvcost,
1249
                   const MV *center_mv, MV *best_mv) {
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
  // First scale has 8-closest points, the rest have 6 points in hex shape
  // at increasing scales
  static const int hex_num_candidates[MAX_PATTERN_SCALES] = {
    8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6
  };
  // Note that the largest candidate step at each scale is 2^scale
  static const MV hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
    {{-1, -1}, {0, -1}, {1, -1}, {1, 0}, {1, 1}, { 0, 1}, { -1, 1}, {-1, 0}},
    {{-1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0}},
    {{-2, -4}, {2, -4}, {4, 0}, {2, 4}, { -2, 4}, { -4, 0}},
    {{-4, -8}, {4, -8}, {8, 0}, {4, 8}, { -4, 8}, { -8, 0}},
    {{-8, -16}, {8, -16}, {16, 0}, {8, 16}, { -8, 16}, { -16, 0}},
    {{-16, -32}, {16, -32}, {32, 0}, {16, 32}, { -16, 32}, { -32, 0}},
    {{-32, -64}, {32, -64}, {64, 0}, {32, 64}, { -32, 64}, { -64, 0}},
    {{-64, -128}, {64, -128}, {128, 0}, {64, 128}, { -64, 128}, { -128, 0}},
    {{-128, -256}, {128, -256}, {256, 0}, {128, 256}, { -128, 256}, { -256, 0}},
    {{-256, -512}, {256, -512}, {512, 0}, {256, 512}, { -256, 512}, { -512, 0}},
    {{-512, -1024}, {512, -1024}, {1024, 0}, {512, 1024}, { -512, 1024},
      { -1024, 0}},
  };
Dmitry Kovalev's avatar
Dmitry Kovalev committed
1270
  return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
1271
                            do_init_search, cost_list, vfp, use_mvcost,
Dmitry Kovalev's avatar
Dmitry Kovalev committed
1272
1273
                            center_mv, best_mv,
                            hex_num_candidates, hex_candidates);
John Koleszar's avatar
John Koleszar committed
1274
}
1275

Dmitry Kovalev's avatar
Dmitry Kovalev committed
1276
int vp9_bigdia_search(const MACROBLOCK *x,
1277
                      MV *ref_mv,
1278
1279
1280
                      int search_param,
                      int sad_per_bit,
                      int do_init_search,
1281
                      int *cost_list,
1282
1283
                      const vp9_variance_fn_ptr_t *vfp,
                      int use_mvcost,
1284
1285
                      const MV *center_mv,
                      MV *best_mv) {
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
  // First scale has 4-closest points, the rest have 8 points in diamond
  // shape at increasing scales
  static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = {
    4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  };
  // Note that the largest candidate step at each scale is 2^scale
  static const MV bigdia_candidates[MAX_PATTERN_SCALES]
                                   [MAX_PATTERN_CANDIDATES] = {
    {{0, -1}, {1, 0}, { 0, 1}, {-1, 0}},
    {{-1, -1}, {0, -2}, {1, -1}, {2, 0}, {1, 1}, {0, 2}, {-1, 1}, {-2, 0}},
    {{-2, -2}, {0, -4}, {2, -2}, {4, 0}, {2, 2}, {0, 4}, {-2, 2}, {-4, 0}},
    {{-4, -4}, {0, -8}, {4, -4}, {8, 0}, {4, 4}, {0, 8}, {-4, 4}, {-8, 0}},
    {{-8, -8}, {0, -16}, {8, -8}, {16, 0}, {8, 8}, {0, 16}, {-8, 8}, {-16, 0}},
    {{-16, -16}, {0, -32}, {16, -16}, {32, 0}, {16, 16}, {0, 32},
      {-16, 16}, {-32, 0}},
    {{-32, -32}, {0, -64}, {32, -32}, {64, 0}, {32, 32}, {0, 64},
      {-32, 32}, {-64, 0}},
    {{-64, -64}, {0, -128}, {64, -64}, {128, 0<