variance.c 52.7 KB
Newer Older
Johann's avatar
Johann committed
1
/*
Yaowu Xu's avatar
Yaowu Xu committed
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Johann's avatar
Johann committed
3
 *
Yaowu Xu's avatar
Yaowu Xu committed
4
5
6
7
8
9
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Johann's avatar
Johann committed
10
 */
11
#include <stdlib.h>
Johann's avatar
Johann committed
12

Yaowu Xu's avatar
Yaowu Xu committed
13
14
#include "./aom_config.h"
#include "./aom_dsp_rtcd.h"
Johann's avatar
Johann committed
15

16
#include "aom_ports/mem.h"
Yaowu Xu's avatar
Yaowu Xu committed
17
#include "aom/aom_integer.h"
Johann's avatar
Johann committed
18

19
#include "aom_dsp/variance.h"
Yaowu Xu's avatar
Yaowu Xu committed
20
#include "aom_dsp/aom_filter.h"
Johann's avatar
Johann committed
21

Yaowu Xu's avatar
Yaowu Xu committed
22
uint32_t aom_get4x4sse_cs_c(const uint8_t *a, int a_stride, const uint8_t *b,
clang-format's avatar
clang-format committed
23
                            int b_stride) {
Johann's avatar
Johann committed
24
25
26
  int distortion = 0;
  int r, c;

Johann's avatar
Johann committed
27
28
  for (r = 0; r < 4; ++r) {
    for (c = 0; c < 4; ++c) {
Johann's avatar
Johann committed
29
30
31
32
33
34
35
36
37
38
39
      int diff = a[c] - b[c];
      distortion += diff * diff;
    }

    a += a_stride;
    b += b_stride;
  }

  return distortion;
}

Yaowu Xu's avatar
Yaowu Xu committed
40
uint32_t aom_get_mb_ss_c(const int16_t *a) {
Johann's avatar
Johann committed
41
42
43
44
45
46
47
48
49
  unsigned int i, sum = 0;

  for (i = 0; i < 256; ++i) {
    sum += a[i] * a[i];
  }

  return sum;
}

Yaowu Xu's avatar
Yaowu Xu committed
50
uint32_t aom_variance_halfpixvar16x16_h_c(const uint8_t *a, int a_stride,
Johann's avatar
Johann committed
51
52
                                          const uint8_t *b, int b_stride,
                                          uint32_t *sse) {
Yaowu Xu's avatar
Yaowu Xu committed
53
  return aom_sub_pixel_variance16x16_c(a, a_stride, 4, 0, b, b_stride, sse);
Johann's avatar
Johann committed
54
55
}

Yaowu Xu's avatar
Yaowu Xu committed
56
uint32_t aom_variance_halfpixvar16x16_v_c(const uint8_t *a, int a_stride,
Johann's avatar
Johann committed
57
58
                                          const uint8_t *b, int b_stride,
                                          uint32_t *sse) {
Yaowu Xu's avatar
Yaowu Xu committed
59
  return aom_sub_pixel_variance16x16_c(a, a_stride, 0, 4, b, b_stride, sse);
Johann's avatar
Johann committed
60
61
}

Yaowu Xu's avatar
Yaowu Xu committed
62
uint32_t aom_variance_halfpixvar16x16_hv_c(const uint8_t *a, int a_stride,
Johann's avatar
Johann committed
63
64
                                           const uint8_t *b, int b_stride,
                                           uint32_t *sse) {
Yaowu Xu's avatar
Yaowu Xu committed
65
  return aom_sub_pixel_variance16x16_c(a, a_stride, 4, 4, b, b_stride, sse);
Johann's avatar
Johann committed
66
67
}

clang-format's avatar
clang-format committed
68
69
static void variance(const uint8_t *a, int a_stride, const uint8_t *b,
                     int b_stride, int w, int h, uint32_t *sse, int *sum) {
Johann's avatar
Johann committed
70
71
72
73
74
  int i, j;

  *sum = 0;
  *sse = 0;

Johann's avatar
Johann committed
75
76
  for (i = 0; i < h; ++i) {
    for (j = 0; j < w; ++j) {
Johann's avatar
Johann committed
77
78
79
80
81
82
83
84
85
86
      const int diff = a[j] - b[j];
      *sum += diff;
      *sse += diff * diff;
    }

    a += a_stride;
    b += b_stride;
  }
}

87
88
89
90
91
92
93
94
uint32_t aom_sse_odd_size(const uint8_t *a, int a_stride, const uint8_t *b,
                          int b_stride, int w, int h) {
  uint32_t sse;
  int sum;
  variance(a, a_stride, b, b_stride, w, h, &sse, &sum);
  return sse;
}

Johann's avatar
Johann committed
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
// Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
// or vertical direction to produce the filtered output block. Used to implement
// the first-pass of 2-D separable filter.
//
// Produces int16_t output to retain precision for the next pass. Two filter
// taps should sum to FILTER_WEIGHT. pixel_step defines whether the filter is
// applied horizontally (pixel_step = 1) or vertically (pixel_step = stride).
// It defines the offset required to move from one input to the next.
static void var_filter_block2d_bil_first_pass(const uint8_t *a, uint16_t *b,
                                              unsigned int src_pixels_per_line,
                                              int pixel_step,
                                              unsigned int output_height,
                                              unsigned int output_width,
                                              const uint8_t *filter) {
  unsigned int i, j;

  for (i = 0; i < output_height; ++i) {
    for (j = 0; j < output_width; ++j) {
clang-format's avatar
clang-format committed
113
114
      b[j] = ROUND_POWER_OF_TWO(
          (int)a[0] * filter[0] + (int)a[pixel_step] * filter[1], FILTER_BITS);
Johann's avatar
Johann committed
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138

      ++a;
    }

    a += src_pixels_per_line - output_width;
    b += output_width;
  }
}

// Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
// or vertical direction to produce the filtered output block. Used to implement
// the second-pass of 2-D separable filter.
//
// Requires 16-bit input as produced by filter_block2d_bil_first_pass. Two
// filter taps should sum to FILTER_WEIGHT. pixel_step defines whether the
// filter is applied horizontally (pixel_step = 1) or vertically
// (pixel_step = stride). It defines the offset required to move from one input
// to the next. Output is 8-bit.
static void var_filter_block2d_bil_second_pass(const uint16_t *a, uint8_t *b,
                                               unsigned int src_pixels_per_line,
                                               unsigned int pixel_step,
                                               unsigned int output_height,
                                               unsigned int output_width,
                                               const uint8_t *filter) {
clang-format's avatar
clang-format committed
139
  unsigned int i, j;
Johann's avatar
Johann committed
140
141
142

  for (i = 0; i < output_height; ++i) {
    for (j = 0; j < output_width; ++j) {
clang-format's avatar
clang-format committed
143
144
      b[j] = ROUND_POWER_OF_TWO(
          (int)a[0] * filter[0] + (int)a[pixel_step] * filter[1], FILTER_BITS);
Johann's avatar
Johann committed
145
146
147
148
149
150
151
152
      ++a;
    }

    a += src_pixels_per_line - output_width;
    b += output_width;
  }
}

clang-format's avatar
clang-format committed
153
#define VAR(W, H)                                                    \
Yaowu Xu's avatar
Yaowu Xu committed
154
  uint32_t aom_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
clang-format's avatar
clang-format committed
155
156
157
158
                                     const uint8_t *b, int b_stride, \
                                     uint32_t *sse) {                \
    int sum;                                                         \
    variance(a, a_stride, b, b_stride, W, H, sse, &sum);             \
159
    return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H));        \
clang-format's avatar
clang-format committed
160
  }
Johann's avatar
Johann committed
161

clang-format's avatar
clang-format committed
162
#define SUBPIX_VAR(W, H)                                                \
Yaowu Xu's avatar
Yaowu Xu committed
163
  uint32_t aom_sub_pixel_variance##W##x##H##_c(                         \
clang-format's avatar
clang-format committed
164
165
166
167
168
169
170
171
172
173
      const uint8_t *a, int a_stride, int xoffset, int yoffset,         \
      const uint8_t *b, int b_stride, uint32_t *sse) {                  \
    uint16_t fdata3[(H + 1) * W];                                       \
    uint8_t temp2[H * W];                                               \
                                                                        \
    var_filter_block2d_bil_first_pass(a, fdata3, a_stride, 1, H + 1, W, \
                                      bilinear_filters_2t[xoffset]);    \
    var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,       \
                                       bilinear_filters_2t[yoffset]);   \
                                                                        \
Yaowu Xu's avatar
Yaowu Xu committed
174
    return aom_variance##W##x##H##_c(temp2, W, b, b_stride, sse);       \
clang-format's avatar
clang-format committed
175
  }
Johann's avatar
Johann committed
176

clang-format's avatar
clang-format committed
177
#define SUBPIX_AVG_VAR(W, H)                                            \
Yaowu Xu's avatar
Yaowu Xu committed
178
  uint32_t aom_sub_pixel_avg_variance##W##x##H##_c(                     \
clang-format's avatar
clang-format committed
179
180
181
182
183
184
185
186
187
188
189
190
      const uint8_t *a, int a_stride, int xoffset, int yoffset,         \
      const uint8_t *b, int b_stride, uint32_t *sse,                    \
      const uint8_t *second_pred) {                                     \
    uint16_t fdata3[(H + 1) * W];                                       \
    uint8_t temp2[H * W];                                               \
    DECLARE_ALIGNED(16, uint8_t, temp3[H * W]);                         \
                                                                        \
    var_filter_block2d_bil_first_pass(a, fdata3, a_stride, 1, H + 1, W, \
                                      bilinear_filters_2t[xoffset]);    \
    var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,       \
                                       bilinear_filters_2t[yoffset]);   \
                                                                        \
Yaowu Xu's avatar
Yaowu Xu committed
191
    aom_comp_avg_pred(temp3, second_pred, W, H, temp2, W);              \
clang-format's avatar
clang-format committed
192
                                                                        \
Yaowu Xu's avatar
Yaowu Xu committed
193
    return aom_variance##W##x##H##_c(temp3, W, b, b_stride, sse);       \
clang-format's avatar
clang-format committed
194
  }
Johann's avatar
Johann committed
195

Johann's avatar
Johann committed
196
197
198
199
/* Identical to the variance call except it takes an additional parameter, sum,
 * and returns that value using pass-by-reference instead of returning
 * sse - sum^2 / w*h
 */
clang-format's avatar
clang-format committed
200
#define GET_VAR(W, H)                                                         \
Yaowu Xu's avatar
Yaowu Xu committed
201
  void aom_get##W##x##H##var_c(const uint8_t *a, int a_stride,                \
clang-format's avatar
clang-format committed
202
203
204
205
                               const uint8_t *b, int b_stride, uint32_t *sse, \
                               int *sum) {                                    \
    variance(a, a_stride, b, b_stride, W, H, sse, sum);                       \
  }
Johann's avatar
Johann committed
206
207
208
209
210

/* Identical to the variance call except it does not calculate the
 * sse - sum^2 / w*h and returns sse in addtion to modifying the passed in
 * variable.
 */
clang-format's avatar
clang-format committed
211
#define MSE(W, H)                                               \
Yaowu Xu's avatar
Yaowu Xu committed
212
  uint32_t aom_mse##W##x##H##_c(const uint8_t *a, int a_stride, \
clang-format's avatar
clang-format committed
213
214
215
216
217
218
                                const uint8_t *b, int b_stride, \
                                uint32_t *sse) {                \
    int sum;                                                    \
    variance(a, a_stride, b, b_stride, W, H, sse, &sum);        \
    return *sse;                                                \
  }
Johann's avatar
Johann committed
219

Johann's avatar
Johann committed
220
221
/* All three forms of the variance are available in the same sizes. */
#define VARIANCES(W, H) \
clang-format's avatar
clang-format committed
222
223
224
  VAR(W, H)             \
  SUBPIX_VAR(W, H)      \
  SUBPIX_AVG_VAR(W, H)
Johann's avatar
Johann committed
225

Yaowu Xu's avatar
Yaowu Xu committed
226
#if CONFIG_AV1 && CONFIG_EXT_PARTITION
227
228
229
VARIANCES(128, 128)
VARIANCES(128, 64)
VARIANCES(64, 128)
Yaowu Xu's avatar
Yaowu Xu committed
230
#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
Johann's avatar
Johann committed
231
232
233
234
235
236
237
238
239
240
241
242
243
VARIANCES(64, 64)
VARIANCES(64, 32)
VARIANCES(32, 64)
VARIANCES(32, 32)
VARIANCES(32, 16)
VARIANCES(16, 32)
VARIANCES(16, 16)
VARIANCES(16, 8)
VARIANCES(8, 16)
VARIANCES(8, 8)
VARIANCES(8, 4)
VARIANCES(4, 8)
VARIANCES(4, 4)
244
245
VARIANCES(4, 2)
VARIANCES(2, 4)
Jingning Han's avatar
Jingning Han committed
246
VARIANCES(2, 2)
Johann's avatar
Johann committed
247
248
249
250
251
252
253
254
255

GET_VAR(16, 16)
GET_VAR(8, 8)

MSE(16, 16)
MSE(16, 8)
MSE(8, 16)
MSE(8, 8)

Yaowu Xu's avatar
Yaowu Xu committed
256
void aom_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width,
clang-format's avatar
clang-format committed
257
                         int height, const uint8_t *ref, int ref_stride) {
Johann's avatar
Johann committed
258
259
  int i, j;

Johann's avatar
Johann committed
260
261
  for (i = 0; i < height; ++i) {
    for (j = 0; j < width; ++j) {
Johann's avatar
Johann committed
262
263
264
265
266
267
268
269
270
      const int tmp = pred[j] + ref[j];
      comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
    }
    comp_pred += width;
    pred += width;
    ref += ref_stride;
  }
}

271
// Get pred block from up-sampled reference.
Yaowu Xu's avatar
Yaowu Xu committed
272
void aom_upsampled_pred_c(uint8_t *comp_pred, int width, int height,
clang-format's avatar
clang-format committed
273
274
275
276
277
278
279
                          const uint8_t *ref, int ref_stride) {
  int i, j, k;
  int stride = ref_stride << 3;

  for (i = 0; i < height; i++) {
    for (j = 0, k = 0; j < width; j++, k += 8) {
      comp_pred[j] = ref[k];
280
    }
clang-format's avatar
clang-format committed
281
282
283
    comp_pred += width;
    ref += stride;
  }
284
285
}

Yaowu Xu's avatar
Yaowu Xu committed
286
void aom_comp_avg_upsampled_pred_c(uint8_t *comp_pred, const uint8_t *pred,
clang-format's avatar
clang-format committed
287
288
289
290
291
292
293
294
295
                                   int width, int height, const uint8_t *ref,
                                   int ref_stride) {
  int i, j;
  int stride = ref_stride << 3;

  for (i = 0; i < height; i++) {
    for (j = 0; j < width; j++) {
      const int tmp = ref[(j << 3)] + pred[j];
      comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
296
    }
clang-format's avatar
clang-format committed
297
298
299
300
    comp_pred += width;
    pred += width;
    ref += stride;
  }
301
302
}

303
#if CONFIG_HIGHBITDEPTH
clang-format's avatar
clang-format committed
304
305
306
static void highbd_variance64(const uint8_t *a8, int a_stride,
                              const uint8_t *b8, int b_stride, int w, int h,
                              uint64_t *sse, int64_t *sum) {
Johann's avatar
Johann committed
307
308
309
310
311
312
313
  int i, j;

  uint16_t *a = CONVERT_TO_SHORTPTR(a8);
  uint16_t *b = CONVERT_TO_SHORTPTR(b8);
  *sum = 0;
  *sse = 0;

Johann's avatar
Johann committed
314
315
  for (i = 0; i < h; ++i) {
    for (j = 0; j < w; ++j) {
Johann's avatar
Johann committed
316
317
318
319
320
321
322
323
324
      const int diff = a[j] - b[j];
      *sum += diff;
      *sse += diff * diff;
    }
    a += a_stride;
    b += b_stride;
  }
}

325
326
327
328
329
330
331
332
uint64_t aom_highbd_sse_odd_size(const uint8_t *a, int a_stride,
                                 const uint8_t *b, int b_stride, int w, int h) {
  uint64_t sse;
  int64_t sum;
  highbd_variance64(a, a_stride, b, b_stride, w, h, &sse, &sum);
  return sse;
}

clang-format's avatar
clang-format committed
333
334
335
static void highbd_8_variance(const uint8_t *a8, int a_stride,
                              const uint8_t *b8, int b_stride, int w, int h,
                              uint32_t *sse, int *sum) {
Johann's avatar
Johann committed
336
  uint64_t sse_long = 0;
337
  int64_t sum_long = 0;
Johann's avatar
Johann committed
338
  highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
Johann's avatar
Johann committed
339
  *sse = (uint32_t)sse_long;
Johann's avatar
Johann committed
340
341
342
  *sum = (int)sum_long;
}

clang-format's avatar
clang-format committed
343
344
345
static void highbd_10_variance(const uint8_t *a8, int a_stride,
                               const uint8_t *b8, int b_stride, int w, int h,
                               uint32_t *sse, int *sum) {
Johann's avatar
Johann committed
346
  uint64_t sse_long = 0;
347
  int64_t sum_long = 0;
Johann's avatar
Johann committed
348
  highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
Johann's avatar
Johann committed
349
  *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 4);
Johann's avatar
Johann committed
350
351
352
  *sum = (int)ROUND_POWER_OF_TWO(sum_long, 2);
}

clang-format's avatar
clang-format committed
353
354
355
static void highbd_12_variance(const uint8_t *a8, int a_stride,
                               const uint8_t *b8, int b_stride, int w, int h,
                               uint32_t *sse, int *sum) {
Johann's avatar
Johann committed
356
  uint64_t sse_long = 0;
357
  int64_t sum_long = 0;
Johann's avatar
Johann committed
358
  highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
Johann's avatar
Johann committed
359
  *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 8);
Johann's avatar
Johann committed
360
361
362
  *sum = (int)ROUND_POWER_OF_TWO(sum_long, 4);
}

clang-format's avatar
clang-format committed
363
#define HIGHBD_VAR(W, H)                                                       \
Yaowu Xu's avatar
Yaowu Xu committed
364
  uint32_t aom_highbd_8_variance##W##x##H##_c(const uint8_t *a, int a_stride,  \
clang-format's avatar
clang-format committed
365
366
367
368
369
370
371
                                              const uint8_t *b, int b_stride,  \
                                              uint32_t *sse) {                 \
    int sum;                                                                   \
    highbd_8_variance(a, a_stride, b, b_stride, W, H, sse, &sum);              \
    return *sse - (((int64_t)sum * sum) / (W * H));                            \
  }                                                                            \
                                                                               \
Yaowu Xu's avatar
Yaowu Xu committed
372
  uint32_t aom_highbd_10_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
clang-format's avatar
clang-format committed
373
374
375
376
377
378
379
380
381
                                               const uint8_t *b, int b_stride, \
                                               uint32_t *sse) {                \
    int sum;                                                                   \
    int64_t var;                                                               \
    highbd_10_variance(a, a_stride, b, b_stride, W, H, sse, &sum);             \
    var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H));                  \
    return (var >= 0) ? (uint32_t)var : 0;                                     \
  }                                                                            \
                                                                               \
Yaowu Xu's avatar
Yaowu Xu committed
382
  uint32_t aom_highbd_12_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
clang-format's avatar
clang-format committed
383
384
385
386
387
388
389
390
                                               const uint8_t *b, int b_stride, \
                                               uint32_t *sse) {                \
    int sum;                                                                   \
    int64_t var;                                                               \
    highbd_12_variance(a, a_stride, b, b_stride, W, H, sse, &sum);             \
    var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H));                  \
    return (var >= 0) ? (uint32_t)var : 0;                                     \
  }
Johann's avatar
Johann committed
391

clang-format's avatar
clang-format committed
392
#define HIGHBD_GET_VAR(S)                                                    \
Yaowu Xu's avatar
Yaowu Xu committed
393
  void aom_highbd_8_get##S##x##S##var_c(const uint8_t *src, int src_stride,  \
clang-format's avatar
clang-format committed
394
395
396
397
398
                                        const uint8_t *ref, int ref_stride,  \
                                        uint32_t *sse, int *sum) {           \
    highbd_8_variance(src, src_stride, ref, ref_stride, S, S, sse, sum);     \
  }                                                                          \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
399
  void aom_highbd_10_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
clang-format's avatar
clang-format committed
400
401
402
403
404
                                         const uint8_t *ref, int ref_stride, \
                                         uint32_t *sse, int *sum) {          \
    highbd_10_variance(src, src_stride, ref, ref_stride, S, S, sse, sum);    \
  }                                                                          \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
405
  void aom_highbd_12_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
clang-format's avatar
clang-format committed
406
407
408
409
                                         const uint8_t *ref, int ref_stride, \
                                         uint32_t *sse, int *sum) {          \
    highbd_12_variance(src, src_stride, ref, ref_stride, S, S, sse, sum);    \
  }
Johann's avatar
Johann committed
410

clang-format's avatar
clang-format committed
411
#define HIGHBD_MSE(W, H)                                                      \
Yaowu Xu's avatar
Yaowu Xu committed
412
  uint32_t aom_highbd_8_mse##W##x##H##_c(const uint8_t *src, int src_stride,  \
clang-format's avatar
clang-format committed
413
414
415
416
417
418
419
                                         const uint8_t *ref, int ref_stride,  \
                                         uint32_t *sse) {                     \
    int sum;                                                                  \
    highbd_8_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum);     \
    return *sse;                                                              \
  }                                                                           \
                                                                              \
Yaowu Xu's avatar
Yaowu Xu committed
420
  uint32_t aom_highbd_10_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
clang-format's avatar
clang-format committed
421
422
423
424
425
426
427
                                          const uint8_t *ref, int ref_stride, \
                                          uint32_t *sse) {                    \
    int sum;                                                                  \
    highbd_10_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum);    \
    return *sse;                                                              \
  }                                                                           \
                                                                              \
Yaowu Xu's avatar
Yaowu Xu committed
428
  uint32_t aom_highbd_12_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
clang-format's avatar
clang-format committed
429
430
431
432
433
434
                                          const uint8_t *ref, int ref_stride, \
                                          uint32_t *sse) {                    \
    int sum;                                                                  \
    highbd_12_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum);    \
    return *sse;                                                              \
  }
Johann's avatar
Johann committed
435

Yaowu Xu's avatar
Yaowu Xu committed
436
void aom_highbd_var_filter_block2d_bil_first_pass(
clang-format's avatar
clang-format committed
437
438
439
    const uint8_t *src_ptr8, uint16_t *output_ptr,
    unsigned int src_pixels_per_line, int pixel_step,
    unsigned int output_height, unsigned int output_width,
Johann's avatar
Johann committed
440
441
442
443
444
    const uint8_t *filter) {
  unsigned int i, j;
  uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8);
  for (i = 0; i < output_height; ++i) {
    for (j = 0; j < output_width; ++j) {
clang-format's avatar
clang-format committed
445
446
447
      output_ptr[j] = ROUND_POWER_OF_TWO(
          (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
          FILTER_BITS);
Johann's avatar
Johann committed
448
449
450
451
452
453
454
455
456
457

      ++src_ptr;
    }

    // Next row...
    src_ptr += src_pixels_per_line - output_width;
    output_ptr += output_width;
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
458
void aom_highbd_var_filter_block2d_bil_second_pass(
clang-format's avatar
clang-format committed
459
460
461
    const uint16_t *src_ptr, uint16_t *output_ptr,
    unsigned int src_pixels_per_line, unsigned int pixel_step,
    unsigned int output_height, unsigned int output_width,
Johann's avatar
Johann committed
462
    const uint8_t *filter) {
clang-format's avatar
clang-format committed
463
  unsigned int i, j;
Johann's avatar
Johann committed
464
465
466

  for (i = 0; i < output_height; ++i) {
    for (j = 0; j < output_width; ++j) {
clang-format's avatar
clang-format committed
467
468
469
      output_ptr[j] = ROUND_POWER_OF_TWO(
          (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
          FILTER_BITS);
Johann's avatar
Johann committed
470
471
472
473
474
475
476
477
      ++src_ptr;
    }

    src_ptr += src_pixels_per_line - output_width;
    output_ptr += output_width;
  }
}

clang-format's avatar
clang-format committed
478
#define HIGHBD_SUBPIX_VAR(W, H)                                              \
Yaowu Xu's avatar
Yaowu Xu committed
479
  uint32_t aom_highbd_8_sub_pixel_variance##W##x##H##_c(                     \
clang-format's avatar
clang-format committed
480
481
482
483
484
      const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
      const uint8_t *dst, int dst_stride, uint32_t *sse) {                   \
    uint16_t fdata3[(H + 1) * W];                                            \
    uint16_t temp2[H * W];                                                   \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
485
    aom_highbd_var_filter_block2d_bil_first_pass(                            \
clang-format's avatar
clang-format committed
486
        src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
Yaowu Xu's avatar
Yaowu Xu committed
487
    aom_highbd_var_filter_block2d_bil_second_pass(                           \
clang-format's avatar
clang-format committed
488
489
        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);            \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
490
    return aom_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W,  \
clang-format's avatar
clang-format committed
491
492
493
                                              dst, dst_stride, sse);         \
  }                                                                          \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
494
  uint32_t aom_highbd_10_sub_pixel_variance##W##x##H##_c(                    \
clang-format's avatar
clang-format committed
495
496
497
498
499
      const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
      const uint8_t *dst, int dst_stride, uint32_t *sse) {                   \
    uint16_t fdata3[(H + 1) * W];                                            \
    uint16_t temp2[H * W];                                                   \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
500
    aom_highbd_var_filter_block2d_bil_first_pass(                            \
clang-format's avatar
clang-format committed
501
        src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
Yaowu Xu's avatar
Yaowu Xu committed
502
    aom_highbd_var_filter_block2d_bil_second_pass(                           \
clang-format's avatar
clang-format committed
503
504
        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);            \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
505
    return aom_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
clang-format's avatar
clang-format committed
506
507
508
                                               dst, dst_stride, sse);        \
  }                                                                          \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
509
  uint32_t aom_highbd_12_sub_pixel_variance##W##x##H##_c(                    \
clang-format's avatar
clang-format committed
510
511
512
513
514
      const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
      const uint8_t *dst, int dst_stride, uint32_t *sse) {                   \
    uint16_t fdata3[(H + 1) * W];                                            \
    uint16_t temp2[H * W];                                                   \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
515
    aom_highbd_var_filter_block2d_bil_first_pass(                            \
clang-format's avatar
clang-format committed
516
        src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
Yaowu Xu's avatar
Yaowu Xu committed
517
    aom_highbd_var_filter_block2d_bil_second_pass(                           \
clang-format's avatar
clang-format committed
518
519
        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);            \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
520
    return aom_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
clang-format's avatar
clang-format committed
521
522
                                               dst, dst_stride, sse);        \
  }
Johann's avatar
Johann committed
523

clang-format's avatar
clang-format committed
524
#define HIGHBD_SUBPIX_AVG_VAR(W, H)                                          \
Yaowu Xu's avatar
Yaowu Xu committed
525
  uint32_t aom_highbd_8_sub_pixel_avg_variance##W##x##H##_c(                 \
clang-format's avatar
clang-format committed
526
527
528
529
530
531
532
      const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
      const uint8_t *dst, int dst_stride, uint32_t *sse,                     \
      const uint8_t *second_pred) {                                          \
    uint16_t fdata3[(H + 1) * W];                                            \
    uint16_t temp2[H * W];                                                   \
    DECLARE_ALIGNED(16, uint16_t, temp3[H * W]);                             \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
533
    aom_highbd_var_filter_block2d_bil_first_pass(                            \
clang-format's avatar
clang-format committed
534
        src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
Yaowu Xu's avatar
Yaowu Xu committed
535
    aom_highbd_var_filter_block2d_bil_second_pass(                           \
clang-format's avatar
clang-format committed
536
537
        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);            \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
538
    aom_highbd_comp_avg_pred_c(temp3, second_pred, W, H,                     \
clang-format's avatar
clang-format committed
539
540
                               CONVERT_TO_BYTEPTR(temp2), W);                \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
541
    return aom_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W,  \
clang-format's avatar
clang-format committed
542
543
544
                                              dst, dst_stride, sse);         \
  }                                                                          \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
545
  uint32_t aom_highbd_10_sub_pixel_avg_variance##W##x##H##_c(                \
clang-format's avatar
clang-format committed
546
547
548
549
550
551
552
      const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
      const uint8_t *dst, int dst_stride, uint32_t *sse,                     \
      const uint8_t *second_pred) {                                          \
    uint16_t fdata3[(H + 1) * W];                                            \
    uint16_t temp2[H * W];                                                   \
    DECLARE_ALIGNED(16, uint16_t, temp3[H * W]);                             \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
553
    aom_highbd_var_filter_block2d_bil_first_pass(                            \
clang-format's avatar
clang-format committed
554
        src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
Yaowu Xu's avatar
Yaowu Xu committed
555
    aom_highbd_var_filter_block2d_bil_second_pass(                           \
clang-format's avatar
clang-format committed
556
557
        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);            \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
558
    aom_highbd_comp_avg_pred_c(temp3, second_pred, W, H,                     \
clang-format's avatar
clang-format committed
559
560
                               CONVERT_TO_BYTEPTR(temp2), W);                \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
561
    return aom_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
clang-format's avatar
clang-format committed
562
563
564
                                               dst, dst_stride, sse);        \
  }                                                                          \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
565
  uint32_t aom_highbd_12_sub_pixel_avg_variance##W##x##H##_c(                \
clang-format's avatar
clang-format committed
566
567
568
569
570
571
572
      const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
      const uint8_t *dst, int dst_stride, uint32_t *sse,                     \
      const uint8_t *second_pred) {                                          \
    uint16_t fdata3[(H + 1) * W];                                            \
    uint16_t temp2[H * W];                                                   \
    DECLARE_ALIGNED(16, uint16_t, temp3[H * W]);                             \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
573
    aom_highbd_var_filter_block2d_bil_first_pass(                            \
clang-format's avatar
clang-format committed
574
        src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
Yaowu Xu's avatar
Yaowu Xu committed
575
    aom_highbd_var_filter_block2d_bil_second_pass(                           \
clang-format's avatar
clang-format committed
576
577
        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);            \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
578
    aom_highbd_comp_avg_pred_c(temp3, second_pred, W, H,                     \
clang-format's avatar
clang-format committed
579
580
                               CONVERT_TO_BYTEPTR(temp2), W);                \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
581
    return aom_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
clang-format's avatar
clang-format committed
582
583
                                               dst, dst_stride, sse);        \
  }
Johann's avatar
Johann committed
584
585
586

/* All three forms of the variance are available in the same sizes. */
#define HIGHBD_VARIANCES(W, H) \
clang-format's avatar
clang-format committed
587
588
589
  HIGHBD_VAR(W, H)             \
  HIGHBD_SUBPIX_VAR(W, H)      \
  HIGHBD_SUBPIX_AVG_VAR(W, H)
Johann's avatar
Johann committed
590

Yaowu Xu's avatar
Yaowu Xu committed
591
#if CONFIG_AV1 && CONFIG_EXT_PARTITION
592
593
594
HIGHBD_VARIANCES(128, 128)
HIGHBD_VARIANCES(128, 64)
HIGHBD_VARIANCES(64, 128)
Yaowu Xu's avatar
Yaowu Xu committed
595
#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
Johann's avatar
Johann committed
596
597
598
599
600
601
602
603
604
605
606
607
608
HIGHBD_VARIANCES(64, 64)
HIGHBD_VARIANCES(64, 32)
HIGHBD_VARIANCES(32, 64)
HIGHBD_VARIANCES(32, 32)
HIGHBD_VARIANCES(32, 16)
HIGHBD_VARIANCES(16, 32)
HIGHBD_VARIANCES(16, 16)
HIGHBD_VARIANCES(16, 8)
HIGHBD_VARIANCES(8, 16)
HIGHBD_VARIANCES(8, 8)
HIGHBD_VARIANCES(8, 4)
HIGHBD_VARIANCES(4, 8)
HIGHBD_VARIANCES(4, 4)
609
610
611
HIGHBD_VARIANCES(4, 2)
HIGHBD_VARIANCES(2, 4)
HIGHBD_VARIANCES(2, 2)
Johann's avatar
Johann committed
612

Johann's avatar
Johann committed
613
614
615
616
617
618
619
620
HIGHBD_GET_VAR(8)
HIGHBD_GET_VAR(16)

HIGHBD_MSE(16, 16)
HIGHBD_MSE(16, 8)
HIGHBD_MSE(8, 16)
HIGHBD_MSE(8, 8)

Yaowu Xu's avatar
Yaowu Xu committed
621
void aom_highbd_comp_avg_pred_c(uint16_t *comp_pred, const uint8_t *pred8,
622
623
                                int width, int height, const uint8_t *ref8,
                                int ref_stride) {
Johann's avatar
Johann committed
624
625
626
  int i, j;
  uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
Johann's avatar
Johann committed
627
628
  for (i = 0; i < height; ++i) {
    for (j = 0; j < width; ++j) {
Johann's avatar
Johann committed
629
630
631
632
633
634
635
636
      const int tmp = pred[j] + ref[j];
      comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
    }
    comp_pred += width;
    pred += width;
    ref += ref_stride;
  }
}
637

Yaowu Xu's avatar
Yaowu Xu committed
638
void aom_highbd_upsampled_pred_c(uint16_t *comp_pred, int width, int height,
clang-format's avatar
clang-format committed
639
                                 const uint8_t *ref8, int ref_stride) {
640
641
642
643
644
645
646
647
648
649
650
651
652
  int i, j;
  int stride = ref_stride << 3;

  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
  for (i = 0; i < height; ++i) {
    for (j = 0; j < width; ++j) {
      comp_pred[j] = ref[(j << 3)];
    }
    comp_pred += width;
    ref += stride;
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
653
void aom_highbd_comp_avg_upsampled_pred_c(uint16_t *comp_pred,
clang-format's avatar
clang-format committed
654
655
                                          const uint8_t *pred8, int width,
                                          int height, const uint8_t *ref8,
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
                                          int ref_stride) {
  int i, j;
  int stride = ref_stride << 3;

  uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
  for (i = 0; i < height; ++i) {
    for (j = 0; j < width; ++j) {
      const int tmp = pred[j] + ref[(j << 3)];
      comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
    }
    comp_pred += width;
    pred += width;
    ref += stride;
  }
}
672
#endif  // CONFIG_HIGHBITDEPTH
673

Yaowu Xu's avatar
Yaowu Xu committed
674
#if CONFIG_AV1 && CONFIG_EXT_INTER
clang-format's avatar
clang-format committed
675
676
677
void masked_variance(const uint8_t *a, int a_stride, const uint8_t *b,
                     int b_stride, const uint8_t *m, int m_stride, int w, int h,
                     unsigned int *sse, int *sum) {
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
  int i, j;

  int64_t sum64 = 0;
  uint64_t sse64 = 0;

  for (i = 0; i < h; i++) {
    for (j = 0; j < w; j++) {
      const int diff = (a[j] - b[j]) * (m[j]);
      sum64 += diff;
      sse64 += diff * diff;
    }

    a += a_stride;
    b += b_stride;
    m += m_stride;
  }
clang-format's avatar
clang-format committed
694
  sum64 = (sum64 >= 0) ? sum64 : -sum64;
Yaowu Xu's avatar
Yaowu Xu committed
695
696
  *sum = (int)ROUND_POWER_OF_TWO(sum64, 6);
  *sse = (uint32_t)ROUND_POWER_OF_TWO(sse64, 12);
697
698
}

clang-format's avatar
clang-format committed
699
#define MASK_VAR(W, H)                                                       \
Yaowu Xu's avatar
Yaowu Xu committed
700
  unsigned int aom_masked_variance##W##x##H##_c(                             \
clang-format's avatar
clang-format committed
701
702
703
704
      const uint8_t *a, int a_stride, const uint8_t *b, int b_stride,        \
      const uint8_t *m, int m_stride, unsigned int *sse) {                   \
    int sum;                                                                 \
    masked_variance(a, a_stride, b, b_stride, m, m_stride, W, H, sse, &sum); \
705
    return *sse - (unsigned int)(((int64_t)sum * sum) / (W * H));            \
clang-format's avatar
clang-format committed
706
  }
707

clang-format's avatar
clang-format committed
708
#define MASK_SUBPIX_VAR(W, H)                                                 \
Yaowu Xu's avatar
Yaowu Xu committed
709
  unsigned int aom_masked_sub_pixel_variance##W##x##H##_c(                    \
clang-format's avatar
clang-format committed
710
711
712
713
714
715
716
717
718
719
720
      const uint8_t *src, int src_stride, int xoffset, int yoffset,           \
      const uint8_t *dst, int dst_stride, const uint8_t *msk, int msk_stride, \
      unsigned int *sse) {                                                    \
    uint16_t fdata3[(H + 1) * W];                                             \
    uint8_t temp2[H * W];                                                     \
                                                                              \
    var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W,   \
                                      bilinear_filters_2t[xoffset]);          \
    var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,             \
                                       bilinear_filters_2t[yoffset]);         \
                                                                              \
Yaowu Xu's avatar
Yaowu Xu committed
721
    return aom_masked_variance##W##x##H##_c(temp2, W, dst, dst_stride, msk,   \
clang-format's avatar
clang-format committed
722
723
                                            msk_stride, sse);                 \
  }
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774

MASK_VAR(4, 4)
MASK_SUBPIX_VAR(4, 4)

MASK_VAR(4, 8)
MASK_SUBPIX_VAR(4, 8)

MASK_VAR(8, 4)
MASK_SUBPIX_VAR(8, 4)

MASK_VAR(8, 8)
MASK_SUBPIX_VAR(8, 8)

MASK_VAR(8, 16)
MASK_SUBPIX_VAR(8, 16)

MASK_VAR(16, 8)
MASK_SUBPIX_VAR(16, 8)

MASK_VAR(16, 16)
MASK_SUBPIX_VAR(16, 16)

MASK_VAR(16, 32)
MASK_SUBPIX_VAR(16, 32)

MASK_VAR(32, 16)
MASK_SUBPIX_VAR(32, 16)

MASK_VAR(32, 32)
MASK_SUBPIX_VAR(32, 32)

MASK_VAR(32, 64)
MASK_SUBPIX_VAR(32, 64)

MASK_VAR(64, 32)
MASK_SUBPIX_VAR(64, 32)

MASK_VAR(64, 64)
MASK_SUBPIX_VAR(64, 64)

#if CONFIG_EXT_PARTITION
MASK_VAR(64, 128)
MASK_SUBPIX_VAR(64, 128)

MASK_VAR(128, 64)
MASK_SUBPIX_VAR(128, 64)

MASK_VAR(128, 128)
MASK_SUBPIX_VAR(128, 128)
#endif  // CONFIG_EXT_PARTITION

775
#if CONFIG_HIGHBITDEPTH
clang-format's avatar
clang-format committed
776
777
778
779
void highbd_masked_variance64(const uint8_t *a8, int a_stride,
                              const uint8_t *b8, int b_stride, const uint8_t *m,
                              int m_stride, int w, int h, uint64_t *sse,
                              int64_t *sum) {
780
781
782
783
  int i, j;
  uint16_t *a = CONVERT_TO_SHORTPTR(a8);
  uint16_t *b = CONVERT_TO_SHORTPTR(b8);

784
785
  *sum = 0;
  *sse = 0;
786
787
788
789

  for (i = 0; i < h; i++) {
    for (j = 0; j < w; j++) {
      const int diff = (a[j] - b[j]) * (m[j]);
790
791
      *sum += (int64_t)diff;
      *sse += (int64_t)diff * diff;
792
793
794
795
796
797
    }

    a += a_stride;
    b += b_stride;
    m += m_stride;
  }
clang-format's avatar
clang-format committed
798
  *sum = (*sum >= 0) ? *sum : -*sum;
799
800
  *sum = ROUND_POWER_OF_TWO(*sum, 6);
  *sse = ROUND_POWER_OF_TWO(*sse, 12);
801
802
}

clang-format's avatar
clang-format committed
803
804
805
void highbd_masked_variance(const uint8_t *a8, int a_stride, const uint8_t *b8,
                            int b_stride, const uint8_t *m, int m_stride, int w,
                            int h, unsigned int *sse, int *sum) {
806
  int64_t sum64;
807
  uint64_t sse64;
clang-format's avatar
clang-format committed
808
809
  highbd_masked_variance64(a8, a_stride, b8, b_stride, m, m_stride, w, h,
                           &sse64, &sum64);
810
  *sum = (int)sum64;
811
812
813
  *sse = (unsigned int)sse64;
}

clang-format's avatar
clang-format committed
814
815
816
void highbd_10_masked_variance(const uint8_t *a8, int a_stride,
                               const uint8_t *b8, int b_stride,
                               const uint8_t *m, int m_stride, int w, int h,
817
                               unsigned int *sse, int *sum) {
818
  int64_t sum64;
819
  uint64_t sse64;
clang-format's avatar
clang-format committed
820
821
  highbd_masked_variance64(a8, a_stride, b8, b_stride, m, m_stride, w, h,
                           &sse64, &sum64);
822
  *sum = (int)ROUND_POWER_OF_TWO(sum64, 2);
823
824
825
  *sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 4);
}

clang-format's avatar
clang-format committed
826
827
828
void highbd_12_masked_variance(const uint8_t *a8, int a_stride,
                               const uint8_t *b8, int b_stride,
                               const uint8_t *m, int m_stride, int w, int h,
829
                               unsigned int *sse, int *sum) {
830
  int64_t sum64;
831
  uint64_t sse64;
clang-format's avatar
clang-format committed
832
833
  highbd_masked_variance64(a8, a_stride, b8, b_stride, m, m_stride, w, h,
                           &sse64, &sum64);
834
  *sum = (int)ROUND_POWER_OF_TWO(sum64, 4);
835
836
837
  *sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 8);
}

clang-format's avatar
clang-format committed
838
#define HIGHBD_MASK_VAR(W, H)                                                \
Yaowu Xu's avatar
Yaowu Xu committed
839
  unsigned int aom_highbd_masked_variance##W##x##H##_c(                      \
clang-format's avatar
clang-format committed
840
841
842
843
844
845
846
847
      const uint8_t *a, int a_stride, const uint8_t *b, int b_stride,        \
      const uint8_t *m, int m_stride, unsigned int *sse) {                   \
    int sum;                                                                 \
    highbd_masked_variance(a, a_stride, b, b_stride, m, m_stride, W, H, sse, \
                           &sum);                                            \
    return *sse - (((int64_t)sum * sum) / (W * H));                          \
  }                                                                          \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
848
  unsigned int aom_highbd_10_masked_variance##W##x##H##_c(                   \
clang-format's avatar
clang-format committed
849
850
851
852
853
854
855
856
      const uint8_t *a, int a_stride, const uint8_t *b, int b_stride,        \
      const uint8_t *m, int m_stride, unsigned int *sse) {                   \
    int sum;                                                                 \
    highbd_10_masked_variance(a, a_stride, b, b_stride, m, m_stride, W, H,   \
                              sse, &sum);                                    \
    return *sse - (((int64_t)sum * sum) / (W * H));                          \
  }                                                                          \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
857
  unsigned int aom_highbd_12_masked_variance##W##x##H##_c(                   \
clang-format's avatar
clang-format committed
858
859
860
861
862
863
864
      const uint8_t *a, int a_stride, const uint8_t *b, int b_stride,        \
      const uint8_t *m, int m_stride, unsigned int *sse) {                   \
    int sum;                                                                 \
    highbd_12_masked_variance(a, a_stride, b, b_stride, m, m_stride, W, H,   \
                              sse, &sum);                                    \
    return *sse - (((int64_t)sum * sum) / (W * H));                          \
  }
865

clang-format's avatar
clang-format committed
866
#define HIGHBD_MASK_SUBPIX_VAR(W, H)                                          \
Yaowu Xu's avatar
Yaowu Xu committed
867
  unsigned int aom_highbd_masked_sub_pixel_variance##W##x##H##_c(             \
clang-format's avatar
clang-format committed
868
869
870
871
872
873
      const uint8_t *src, int src_stride, int xoffset, int yoffset,           \
      const uint8_t *dst, int dst_stride, const uint8_t *msk, int msk_stride, \
      unsigned int *sse) {                                                    \
    uint16_t fdata3[(H + 1) * W];                                             \
    uint16_t temp2[H * W];                                                    \
                                                                              \
Yaowu Xu's avatar
Yaowu Xu committed
874
    aom_highbd_var_filter_block2d_bil_first_pass(                             \
clang-format's avatar
clang-format committed
875
        src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);  \
Yaowu Xu's avatar
Yaowu Xu committed
876
    aom_highbd_var_filter_block2d_bil_second_pass(                            \
clang-format's avatar
clang-format committed
877
878
        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);             \
                                                                              \
Yaowu Xu's avatar
Yaowu Xu committed
879
    return aom_highbd_masked_variance##W##x##H##_c(                           \
clang-format's avatar
clang-format committed
880
881
882
        CONVERT_TO_BYTEPTR(temp2), W, dst, dst_stride, msk, msk_stride, sse); \
  }                                                                           \
                                                                              \
Yaowu Xu's avatar
Yaowu Xu committed
883
  unsigned int aom_highbd_10_masked_sub_pixel_variance##W##x##H##_c(          \
clang-format's avatar
clang-format committed
884
885
886
887
888
889
      const uint8_t *src, int src_stride, int xoffset, int yoffset,           \
      const uint8_t *dst, int dst_stride, const uint8_t *msk, int msk_stride, \
      unsigned int *sse) {                                                    \
    uint16_t fdata3[(H + 1) * W];                                             \
    uint16_t temp2[H * W];                                                    \
                                                                              \
Yaowu Xu's avatar
Yaowu Xu committed
890
    aom_highbd_var_filter_block2d_bil_first_pass(                             \
clang-format's avatar
clang-format committed
891
        src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);  \
Yaowu Xu's avatar
Yaowu Xu committed
892
    aom_highbd_var_filter_block2d_bil_second_pass(                            \
clang-format's avatar
clang-format committed
893
894
        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);             \
                                                                              \
Yaowu Xu's avatar
Yaowu Xu committed
895
    return aom_highbd_10_masked_variance##W##x##H##_c(                        \
clang-format's avatar
clang-format committed
896
897
898
        CONVERT_TO_BYTEPTR(temp2), W, dst, dst_stride, msk, msk_stride, sse); \
  }                                                                           \
                                                                              \
Yaowu Xu's avatar
Yaowu Xu committed
899
  unsigned int aom_highbd_12_masked_sub_pixel_variance##W##x##H##_c(          \
clang-format's avatar
clang-format committed
900
901
902
903
904
905
      const uint8_t *src, int src_stride, int xoffset, int yoffset,           \
      const uint8_t *dst, int dst_stride, const uint8_t *msk, int msk_stride, \
      unsigned int *sse) {                                                    \
    uint16_t fdata3[(H + 1) * W];                                             \
    uint16_t temp2[H * W];                                                    \
                                                                              \
Yaowu Xu's avatar
Yaowu Xu committed
906
    aom_highbd_var_filter_block2d_bil_first_pass(                             \
clang-format's avatar
clang-format committed
907
        src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);  \
Yaowu Xu's avatar
Yaowu Xu committed
908
    aom_highbd_var_filter_block2d_bil_second_pass(                            \
clang-format's avatar
clang-format committed
909
910
        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);             \
                                                                              \
Yaowu Xu's avatar
Yaowu Xu committed
911
    return aom_highbd_12_masked_variance##W##x##H##_c(                        \
clang-format's avatar
clang-format committed
912
913
        CONVERT_TO_BYTEPTR(temp2), W, dst, dst_stride, msk, msk_stride, sse); \
  }
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963

HIGHBD_MASK_VAR(4, 4)
HIGHBD_MASK_SUBPIX_VAR(4, 4)

HIGHBD_MASK_VAR(4, 8)
HIGHBD_MASK_SUBPIX_VAR(4, 8)

HIGHBD_MASK_VAR(8, 4)
HIGHBD_MASK_SUBPIX_VAR(8, 4)

HIGHBD_MASK_VAR(8, 8)
HIGHBD_MASK_SUBPIX_VAR(8, 8)

HIGHBD_MASK_VAR(8, 16)
HIGHBD_MASK_SUBPIX_VAR(8, 16)

HIGHBD_MASK_VAR(16, 8)
HIGHBD_MASK_SUBPIX_VAR(16, 8)

HIGHBD_MASK_VAR(16, 16)
HIGHBD_MASK_SUBPIX_VAR(16, 16)

HIGHBD_MASK_VAR(16, 32)
HIGHBD_MASK_SUBPIX_VAR(16, 32)

HIGHBD_MASK_VAR(32, 16)
HIGHBD_MASK_SUBPIX_VAR(32, 16)

HIGHBD_MASK_VAR(32, 32)
HIGHBD_MASK_SUBPIX_VAR(32, 32)

HIGHBD_MASK_VAR(32, 64)
HIGHBD_MASK_SUBPIX_VAR(32, 64)

HIGHBD_MASK_VAR(64, 32)
HIGHBD_MASK_SUBPIX_VAR(64, 32)

HIGHBD_MASK_VAR(64, 64)
HIGHBD_MASK_SUBPIX_VAR(64, 64)

#if CONFIG_EXT_PARTITION
HIGHBD_MASK_VAR(64, 128)
HIGHBD_MASK_SUBPIX_VAR(64, 128)

HIGHBD_MASK_VAR(128, 64)
HIGHBD_MASK_SUBPIX_VAR(128, 64)

HIGHBD_MASK_VAR(128, 128)
HIGHBD_MASK_SUBPIX_VAR(128, 128)
#endif  // CONFIG_EXT_PARTITION
964
#endif  // CONFIG_HIGHBITDEPTH
Yaowu Xu's avatar
Yaowu Xu committed
965
#endif  // CONFIG_AV1 && CONFIG_EXT_INTER
966

Yue Chen's avatar
Yue Chen committed
967
#if CONFIG_AV1 && CONFIG_MOTION_VAR
clang-format's avatar
clang-format committed
968
static INLINE void obmc_variance(const uint8_t *pre, int pre_stride,
969
970
                                 const int32_t *wsrc, const int32_t *mask,
                                 int w, int h, unsigned int *sse, int *sum) {
971
972
973
974
975
976
977
  int i, j;

  *sse = 0;
  *sum = 0;

  for (i = 0; i < h; i++) {
    for (j = 0; j < w; j++) {
978
      int diff = ROUND_POWER_OF_TWO_SIGNED(wsrc[j] - pre[j] * mask[j], 12);
979
980
981
982
      *sum += diff;
      *sse += diff * diff;
    }

983
984
985
    pre += pre_stride;
    wsrc += w;
    mask += w;
986
987
988
  }
}

989
990
991
992
993
994
995
#define OBMC_VAR(W, H)                                            \
  unsigned int aom_obmc_variance##W##x##H##_c(                    \
      const uint8_t *pre, int pre_stride, const int32_t *wsrc,    \
      const int32_t *mask, unsigned int *sse) {                   \
    int sum;                                                      \
    obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum);  \
    return *sse - (unsigned int)(((int64_t)sum * sum) / (W * H)); \
clang-format's avatar
clang-format committed
996
  }
997

clang-format's avatar
clang-format committed
998
#define OBMC_SUBPIX_VAR(W, H)                                               \
Yaowu Xu's avatar
Yaowu Xu committed
999
  unsigned int aom_obmc_sub_pixel_variance##W##x##H##_c(                    \
clang-format's avatar
clang-format committed
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
      const uint8_t *pre, int pre_stride, int xoffset, int yoffset,         \
      const int32_t *wsrc, const int32_t *mask, unsigned int *sse) {        \
    uint16_t fdata3[(H + 1) * W];                                           \
    uint8_t temp2[H * W];                                                   \
                                                                            \
    var_filter_block2d_bil_first_pass(pre, fdata3, pre_stride, 1, H + 1, W, \
                                      bilinear_filters_2t[xoffset]);        \
    var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,           \
                                       bilinear_filters_2t[yoffset]);       \
                                                                            \
Yaowu Xu's avatar
Yaowu Xu committed
1010
    return aom_obmc_variance##W##x##H##_c(temp2, W, wsrc, mask, sse);       \
clang-format's avatar
clang-format committed
1011
  }
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062

OBMC_VAR(4, 4)
OBMC_SUBPIX_VAR(4, 4)

OBMC_VAR(4, 8)
OBMC_SUBPIX_VAR(4, 8)

OBMC_VAR(8, 4)
OBMC_SUBPIX_VAR(8, 4)

OBMC_VAR(8, 8)
OBMC_SUBPIX_VAR(8, 8)

OBMC_VAR(8, 16)
OBMC_SUBPIX_VAR(8, 16)

OBMC_VAR(16, 8)
OBMC_SUBPIX_VAR(16, 8)

OBMC_VAR(16, 16)
OBMC_SUBPIX_VAR(16, 16)

OBMC_VAR(16, 32)
OBMC_SUBPIX_VAR(16, 32)

OBMC_VAR(32, 16)
OBMC_SUBPIX_VAR(32, 16)

OBMC_VAR(32, 32)
OBMC_SUBPIX_VAR(32, 32)

OBMC_VAR(32, 64)
OBMC_SUBPIX_VAR(32, 64)

OBMC_VAR(64, 32)
OBMC_SUBPIX_VAR(64, 32)

OBMC_VAR(64, 64)
OBMC_SUBPIX_VAR(64, 64)

#if CONFIG_EXT_PARTITION
OBMC_VAR(64, 128)
OBMC_SUBPIX_VAR(64, 128)

OBMC_VAR(128, 64)
OBMC_SUBPIX_VAR(128, 64)

OBMC_VAR(128, 128)
OBMC_SUBPIX_VAR(128, 128)
#endif  // CONFIG_EXT_PARTITION

1063
#if CONFIG_HIGHBITDEPTH
1064
1065
static INLINE void highbd_obmc_variance64(const uint8_t *pre8, int pre_stride,
                                          const int32_t *wsrc,
clang-format's avatar
clang-format committed
1066
                                          const int32_t *mask, int w, int h,
1067
                                          uint64_t *sse, int64_t *sum) {
1068
  int i, j;
1069
  uint16_t *pre = CONVERT_TO_SHORTPTR(pre8);
1070
1071
1072
1073
1074
1075

  *sse = 0;
  *sum = 0;

  for (i = 0; i < h; i++) {
    for (j = 0; j < w; j++) {
1076
      int diff = ROUND_POWER_OF_TWO_SIGNED(wsrc[j] - pre[j] * mask[j], 12);
1077
1078
1079
1080
      *sum += diff;
      *sse += diff * diff;
    }

1081
1082
1083
    pre += pre_stride;
    wsrc += w;
    mask += w;
1084
1085
1086
  }
}

1087
1088
static INLINE void highbd_obmc_variance(const uint8_t *pre8, int pre_stride,
                                        const int32_t *wsrc,
clang-format's avatar
clang-format committed
1089
                                        const int32_t *mask, int w, int h,
1090
                                        unsigned int *sse, int *sum) {
1091
1092
  int64_t sum64;
  uint64_t sse64;
1093
  highbd_obmc_variance64(pre8, pre_stride, wsrc, mask, w, h, &sse64, &sum64);
1094
1095
1096
1097
  *sum = (int)sum64;
  *sse = (unsigned int)sse64;
}

clang-format's avatar
clang-format committed
1098
static INLINE void highbd_10_obmc_variance(const uint8_t *pre8, int pre_stride,
1099
                                           const int32_t *wsrc,
clang-format's avatar
clang-format committed
1100
                                           const int32_t *mask, int w, int h,
1101
                                           unsigned int *sse, int *sum) {
1102
1103
  int64_t sum64;
  uint64_t sse64;
1104
  highbd_obmc_variance64(pre8, pre_stride, wsrc, mask, w, h, &sse64, &sum64);
1105
1106
1107
1108
  *sum = (int)ROUND_POWER_OF_TWO(sum64, 2);
  *sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 4);
}

clang-format's avatar
clang-format committed
1109
static INLINE void highbd_12_obmc_variance(const uint8_t *pre8, int pre_stride,
1110
                                           const int32_t *wsrc,
clang-format's avatar
clang-format committed
1111
                                           const int32_t *mask, int w, int h,
1112
                                           unsigned int *sse, int *sum) {
1113
1114
  int64_t sum64;
  uint64_t sse64;
1115
  highbd_obmc_variance64(pre8, pre_stride, wsrc, mask, w, h, &sse64, &sum64);
1116
1117
1118
1119
  *sum = (int)ROUND_POWER_OF_TWO(sum64, 4);
  *sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 8);
}

clang-format's avatar
clang-format committed
1120
#define HIGHBD_OBMC_VAR(W, H)                                              \
Yaowu Xu's avatar
Yaowu Xu committed
1121
  unsigned int aom_highbd_obmc_variance##W##x##H##_c(                      \
clang-format's avatar
clang-format committed
1122
1123
1124
1125
1126
1127
1128
      const uint8_t *pre, int pre_stride, const int32_t *wsrc,             \
      const int32_t *mask, unsigned int *sse) {                            \
    int sum;                                                               \
    highbd_obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum);    \
    return *sse - (((int64_t)sum * sum) / (W * H));                        \
  }                                                                        \
                                                                           \
Yaowu Xu's avatar
Yaowu Xu committed
1129
  unsigned int aom_highbd_10_obmc_variance##W##x##H##_c(                   \
clang-format's avatar
clang-format committed
1130
1131
1132
1133
1134
1135
1136
      const uint8_t *pre, int pre_stride, const int32_t *wsrc,             \
      const int32_t *mask, unsigned int *sse) {                            \
    int sum;                                                               \
    highbd_10_obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum); \
    return *sse - (((int64_t)sum * sum) / (W * H));                        \
  }                                                                        \
                                                                           \
Yaowu Xu's avatar
Yaowu Xu committed
1137
  unsigned int aom_highbd_12_obmc_variance##W##x##H##_c(                   \
clang-format's avatar
clang-format committed
1138
1139
1140
1141
1142
1143
      const uint8_t *pre, int pre_stride, const int32_t *wsrc,             \
      const int32_t *mask, unsigned int *sse) {                            \
    int sum;                                                               \
    highbd_12_obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum); \
    return *sse - (((int64_t)sum * sum) / (W * H));                        \
  }
1144

clang-format's avatar
clang-format committed
1145
#define HIGHBD_OBMC_SUBPIX_VAR(W, H)                                           \
Yaowu Xu's avatar
Yaowu Xu committed
1146
  unsigned int aom_highbd_obmc_sub_pixel_variance##W##x##H##_c(                \
clang-format's avatar
clang-format committed
1147
1148
1149
1150
1151
      const uint8_t *pre, int pre_stride, int xoffset, int yoffset,            \
      const int32_t *wsrc, const int32_t *mask, unsigned int *sse) {           \
    uint16_t fdata3[(H + 1) * W];                                              \
    uint16_t temp2[H * W];                                                     \
                                                                               \
Yaowu Xu's avatar
Yaowu Xu committed
1152
    aom_highbd_var_filter_block2d_bil_first_pass(                              \
clang-format's avatar
clang-format committed
1153
        pre, fdata3, pre_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);   \
Yaowu Xu's avatar
Yaowu Xu committed
1154
    aom_highbd_var_filter_block2d_bil_second_pass(                             \
clang-format's avatar
clang-format committed
1155
1156
        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);              \
                                                                               \
Yaowu Xu's avatar
Yaowu Xu committed
1157
    return aom_highbd_obmc_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
clang-format's avatar
clang-format committed
1158
1159
1160
                                                 wsrc, mask, sse);             \
  }                                                                            \
                                                                               \
Yaowu Xu's avatar
Yaowu Xu committed
1161
  unsigned int aom_highbd_10_obmc_sub_pixel_variance##W##x##H##_c(             \
clang-format's avatar
clang-format committed
1162
1163
1164
1165
1166
      const uint8_t *pre, int pre_stride, int xoffset, int yoffset,            \
      const int32_t *wsrc, const int32_t *mask, unsigned int *sse) {           \
    uint16_t fdata3[(H + 1) * W];                                              \
    uint16_t temp2[H * W];                                                     \
                                                                               \
Yaowu Xu's avatar
Yaowu Xu committed
1167
    aom_highbd_var_filter_block2d_bil_first_pass(                              \
clang-format's avatar
clang-format committed
1168
        pre, fdata3, pre_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);   \
Yaowu Xu's avatar
Yaowu Xu committed
1169
    aom_highbd_var_filter_block2d_bil_second_pass(                             \
clang-format's avatar
clang-format committed
1170
1171
        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);              \
                                                                               \
Yaowu Xu's avatar
Yaowu Xu committed
1172
    return aom_highbd_10_obmc_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
clang-format's avatar
clang-format committed
1173
1174
1175
                                                    W, wsrc, mask, sse);       \
  }                                                                            \
                                                                               \
Yaowu Xu's avatar
Yaowu Xu committed
1176
  unsigned int aom_highbd_12_obmc_sub_pixel_variance##W##x##H##_c(             \
clang-format's avatar
clang-format committed
1177
1178
1179
1180
1181
      const uint8_t *pre, int pre_stride, int xoffset, int yoffset,            \
      const int32_t *wsrc, const int32_t *mask, unsigned int *sse) {           \
    uint16_t fdata3[(H + 1) * W];                                              \
    uint16_t temp2[H * W];                                                     \
                                                                               \
Yaowu Xu's avatar
Yaowu Xu committed
1182
    aom_highbd_var_filter_block2d_bil_first_pass(                              \
clang-format's avatar
clang-format committed
1183
        pre, fdata3, pre_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);   \
Yaowu Xu's avatar
Yaowu Xu committed
1184
    aom_highbd_var_filter_block2d_bil_second_pass(                             \
clang-format's avatar
clang-format committed
1185
1186
        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);              \
                                                                               \
Yaowu Xu's avatar
Yaowu Xu committed
1187
    return aom_highbd_12_obmc_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
clang-format's avatar
clang-format committed
1188
1189
                                                    W, wsrc, mask, sse);       \
  }
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224