variance.c 52 KB
Newer Older
Johann's avatar
Johann committed
1
2
3
4
5
6
7
8
9
/*
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
10
#include <stdlib.h>
Johann's avatar
Johann committed
11

Yaowu Xu's avatar
Yaowu Xu committed
12
13
#include "./aom_config.h"
#include "./aom_dsp_rtcd.h"
Johann's avatar
Johann committed
14

15
#include "aom_ports/mem.h"
Yaowu Xu's avatar
Yaowu Xu committed
16
#include "aom/aom_integer.h"
Johann's avatar
Johann committed
17

18
#include "aom_dsp/variance.h"
Yaowu Xu's avatar
Yaowu Xu committed
19
#include "aom_dsp/aom_filter.h"
Johann's avatar
Johann committed
20

Yaowu Xu's avatar
Yaowu Xu committed
21
uint32_t aom_get4x4sse_cs_c(const uint8_t *a, int a_stride, const uint8_t *b,
clang-format's avatar
clang-format committed
22
                            int b_stride) {
Johann's avatar
Johann committed
23
24
25
  int distortion = 0;
  int r, c;

Johann's avatar
Johann committed
26
27
  for (r = 0; r < 4; ++r) {
    for (c = 0; c < 4; ++c) {
Johann's avatar
Johann committed
28
29
30
31
32
33
34
35
36
37
38
      int diff = a[c] - b[c];
      distortion += diff * diff;
    }

    a += a_stride;
    b += b_stride;
  }

  return distortion;
}

Yaowu Xu's avatar
Yaowu Xu committed
39
uint32_t aom_get_mb_ss_c(const int16_t *a) {
Johann's avatar
Johann committed
40
41
42
43
44
45
46
47
48
  unsigned int i, sum = 0;

  for (i = 0; i < 256; ++i) {
    sum += a[i] * a[i];
  }

  return sum;
}

Yaowu Xu's avatar
Yaowu Xu committed
49
uint32_t aom_variance_halfpixvar16x16_h_c(const uint8_t *a, int a_stride,
Johann's avatar
Johann committed
50
51
                                          const uint8_t *b, int b_stride,
                                          uint32_t *sse) {
Yaowu Xu's avatar
Yaowu Xu committed
52
  return aom_sub_pixel_variance16x16_c(a, a_stride, 4, 0, b, b_stride, sse);
Johann's avatar
Johann committed
53
54
}

Yaowu Xu's avatar
Yaowu Xu committed
55
uint32_t aom_variance_halfpixvar16x16_v_c(const uint8_t *a, int a_stride,
Johann's avatar
Johann committed
56
57
                                          const uint8_t *b, int b_stride,
                                          uint32_t *sse) {
Yaowu Xu's avatar
Yaowu Xu committed
58
  return aom_sub_pixel_variance16x16_c(a, a_stride, 0, 4, b, b_stride, sse);
Johann's avatar
Johann committed
59
60
}

Yaowu Xu's avatar
Yaowu Xu committed
61
uint32_t aom_variance_halfpixvar16x16_hv_c(const uint8_t *a, int a_stride,
Johann's avatar
Johann committed
62
63
                                           const uint8_t *b, int b_stride,
                                           uint32_t *sse) {
Yaowu Xu's avatar
Yaowu Xu committed
64
  return aom_sub_pixel_variance16x16_c(a, a_stride, 4, 4, b, b_stride, sse);
Johann's avatar
Johann committed
65
66
}

clang-format's avatar
clang-format committed
67
68
static void variance(const uint8_t *a, int a_stride, const uint8_t *b,
                     int b_stride, int w, int h, uint32_t *sse, int *sum) {
Johann's avatar
Johann committed
69
70
71
72
73
  int i, j;

  *sum = 0;
  *sse = 0;

Johann's avatar
Johann committed
74
75
  for (i = 0; i < h; ++i) {
    for (j = 0; j < w; ++j) {
Johann's avatar
Johann committed
76
77
78
79
80
81
82
83
84
85
      const int diff = a[j] - b[j];
      *sum += diff;
      *sse += diff * diff;
    }

    a += a_stride;
    b += b_stride;
  }
}

Johann's avatar
Johann committed
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
// Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
// or vertical direction to produce the filtered output block. Used to implement
// the first-pass of 2-D separable filter.
//
// Produces int16_t output to retain precision for the next pass. Two filter
// taps should sum to FILTER_WEIGHT. pixel_step defines whether the filter is
// applied horizontally (pixel_step = 1) or vertically (pixel_step = stride).
// It defines the offset required to move from one input to the next.
static void var_filter_block2d_bil_first_pass(const uint8_t *a, uint16_t *b,
                                              unsigned int src_pixels_per_line,
                                              int pixel_step,
                                              unsigned int output_height,
                                              unsigned int output_width,
                                              const uint8_t *filter) {
  unsigned int i, j;

  for (i = 0; i < output_height; ++i) {
    for (j = 0; j < output_width; ++j) {
clang-format's avatar
clang-format committed
104
105
      b[j] = ROUND_POWER_OF_TWO(
          (int)a[0] * filter[0] + (int)a[pixel_step] * filter[1], FILTER_BITS);
Johann's avatar
Johann committed
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129

      ++a;
    }

    a += src_pixels_per_line - output_width;
    b += output_width;
  }
}

// Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
// or vertical direction to produce the filtered output block. Used to implement
// the second-pass of 2-D separable filter.
//
// Requires 16-bit input as produced by filter_block2d_bil_first_pass. Two
// filter taps should sum to FILTER_WEIGHT. pixel_step defines whether the
// filter is applied horizontally (pixel_step = 1) or vertically
// (pixel_step = stride). It defines the offset required to move from one input
// to the next. Output is 8-bit.
static void var_filter_block2d_bil_second_pass(const uint16_t *a, uint8_t *b,
                                               unsigned int src_pixels_per_line,
                                               unsigned int pixel_step,
                                               unsigned int output_height,
                                               unsigned int output_width,
                                               const uint8_t *filter) {
clang-format's avatar
clang-format committed
130
  unsigned int i, j;
Johann's avatar
Johann committed
131
132
133

  for (i = 0; i < output_height; ++i) {
    for (j = 0; j < output_width; ++j) {
clang-format's avatar
clang-format committed
134
135
      b[j] = ROUND_POWER_OF_TWO(
          (int)a[0] * filter[0] + (int)a[pixel_step] * filter[1], FILTER_BITS);
Johann's avatar
Johann committed
136
137
138
139
140
141
142
143
      ++a;
    }

    a += src_pixels_per_line - output_width;
    b += output_width;
  }
}

clang-format's avatar
clang-format committed
144
#define VAR(W, H)                                                    \
Yaowu Xu's avatar
Yaowu Xu committed
145
  uint32_t aom_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
clang-format's avatar
clang-format committed
146
147
148
149
150
151
                                     const uint8_t *b, int b_stride, \
                                     uint32_t *sse) {                \
    int sum;                                                         \
    variance(a, a_stride, b, b_stride, W, H, sse, &sum);             \
    return *sse - (((int64_t)sum * sum) / (W * H));                  \
  }
Johann's avatar
Johann committed
152

clang-format's avatar
clang-format committed
153
#define SUBPIX_VAR(W, H)                                                \
Yaowu Xu's avatar
Yaowu Xu committed
154
  uint32_t aom_sub_pixel_variance##W##x##H##_c(                         \
clang-format's avatar
clang-format committed
155
156
157
158
159
160
161
162
163
164
      const uint8_t *a, int a_stride, int xoffset, int yoffset,         \
      const uint8_t *b, int b_stride, uint32_t *sse) {                  \
    uint16_t fdata3[(H + 1) * W];                                       \
    uint8_t temp2[H * W];                                               \
                                                                        \
    var_filter_block2d_bil_first_pass(a, fdata3, a_stride, 1, H + 1, W, \
                                      bilinear_filters_2t[xoffset]);    \
    var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,       \
                                       bilinear_filters_2t[yoffset]);   \
                                                                        \
Yaowu Xu's avatar
Yaowu Xu committed
165
    return aom_variance##W##x##H##_c(temp2, W, b, b_stride, sse);       \
clang-format's avatar
clang-format committed
166
  }
Johann's avatar
Johann committed
167

clang-format's avatar
clang-format committed
168
#define SUBPIX_AVG_VAR(W, H)                                            \
Yaowu Xu's avatar
Yaowu Xu committed
169
  uint32_t aom_sub_pixel_avg_variance##W##x##H##_c(                     \
clang-format's avatar
clang-format committed
170
171
172
173
174
175
176
177
178
179
180
181
      const uint8_t *a, int a_stride, int xoffset, int yoffset,         \
      const uint8_t *b, int b_stride, uint32_t *sse,                    \
      const uint8_t *second_pred) {                                     \
    uint16_t fdata3[(H + 1) * W];                                       \
    uint8_t temp2[H * W];                                               \
    DECLARE_ALIGNED(16, uint8_t, temp3[H * W]);                         \
                                                                        \
    var_filter_block2d_bil_first_pass(a, fdata3, a_stride, 1, H + 1, W, \
                                      bilinear_filters_2t[xoffset]);    \
    var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,       \
                                       bilinear_filters_2t[yoffset]);   \
                                                                        \
Yaowu Xu's avatar
Yaowu Xu committed
182
    aom_comp_avg_pred(temp3, second_pred, W, H, temp2, W);              \
clang-format's avatar
clang-format committed
183
                                                                        \
Yaowu Xu's avatar
Yaowu Xu committed
184
    return aom_variance##W##x##H##_c(temp3, W, b, b_stride, sse);       \
clang-format's avatar
clang-format committed
185
  }
Johann's avatar
Johann committed
186

Johann's avatar
Johann committed
187
188
189
190
/* Identical to the variance call except it takes an additional parameter, sum,
 * and returns that value using pass-by-reference instead of returning
 * sse - sum^2 / w*h
 */
clang-format's avatar
clang-format committed
191
#define GET_VAR(W, H)                                                         \
Yaowu Xu's avatar
Yaowu Xu committed
192
  void aom_get##W##x##H##var_c(const uint8_t *a, int a_stride,                \
clang-format's avatar
clang-format committed
193
194
195
196
                               const uint8_t *b, int b_stride, uint32_t *sse, \
                               int *sum) {                                    \
    variance(a, a_stride, b, b_stride, W, H, sse, sum);                       \
  }
Johann's avatar
Johann committed
197
198
199
200
201

/* Identical to the variance call except it does not calculate the
 * sse - sum^2 / w*h and returns sse in addtion to modifying the passed in
 * variable.
 */
clang-format's avatar
clang-format committed
202
#define MSE(W, H)                                               \
Yaowu Xu's avatar
Yaowu Xu committed
203
  uint32_t aom_mse##W##x##H##_c(const uint8_t *a, int a_stride, \
clang-format's avatar
clang-format committed
204
205
206
207
208
209
                                const uint8_t *b, int b_stride, \
                                uint32_t *sse) {                \
    int sum;                                                    \
    variance(a, a_stride, b, b_stride, W, H, sse, &sum);        \
    return *sse;                                                \
  }
Johann's avatar
Johann committed
210

Johann's avatar
Johann committed
211
212
/* All three forms of the variance are available in the same sizes. */
#define VARIANCES(W, H) \
clang-format's avatar
clang-format committed
213
214
215
  VAR(W, H)             \
  SUBPIX_VAR(W, H)      \
  SUBPIX_AVG_VAR(W, H)
Johann's avatar
Johann committed
216

Yaowu Xu's avatar
Yaowu Xu committed
217
#if CONFIG_AV1 && CONFIG_EXT_PARTITION
218
219
220
VARIANCES(128, 128)
VARIANCES(128, 64)
VARIANCES(64, 128)
Yaowu Xu's avatar
Yaowu Xu committed
221
#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
Johann's avatar
Johann committed
222
223
224
225
226
227
228
229
230
231
232
233
234
VARIANCES(64, 64)
VARIANCES(64, 32)
VARIANCES(32, 64)
VARIANCES(32, 32)
VARIANCES(32, 16)
VARIANCES(16, 32)
VARIANCES(16, 16)
VARIANCES(16, 8)
VARIANCES(8, 16)
VARIANCES(8, 8)
VARIANCES(8, 4)
VARIANCES(4, 8)
VARIANCES(4, 4)
Johann's avatar
Johann committed
235
236
237
238
239
240
241
242
243

GET_VAR(16, 16)
GET_VAR(8, 8)

MSE(16, 16)
MSE(16, 8)
MSE(8, 16)
MSE(8, 8)

Yaowu Xu's avatar
Yaowu Xu committed
244
void aom_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width,
clang-format's avatar
clang-format committed
245
                         int height, const uint8_t *ref, int ref_stride) {
Johann's avatar
Johann committed
246
247
  int i, j;

Johann's avatar
Johann committed
248
249
  for (i = 0; i < height; ++i) {
    for (j = 0; j < width; ++j) {
Johann's avatar
Johann committed
250
251
252
253
254
255
256
257
258
      const int tmp = pred[j] + ref[j];
      comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
    }
    comp_pred += width;
    pred += width;
    ref += ref_stride;
  }
}

259
// Get pred block from up-sampled reference.
Yaowu Xu's avatar
Yaowu Xu committed
260
void aom_upsampled_pred_c(uint8_t *comp_pred, int width, int height,
clang-format's avatar
clang-format committed
261
262
263
264
265
266
267
                          const uint8_t *ref, int ref_stride) {
  int i, j, k;
  int stride = ref_stride << 3;

  for (i = 0; i < height; i++) {
    for (j = 0, k = 0; j < width; j++, k += 8) {
      comp_pred[j] = ref[k];
268
    }
clang-format's avatar
clang-format committed
269
270
271
    comp_pred += width;
    ref += stride;
  }
272
273
}

Yaowu Xu's avatar
Yaowu Xu committed
274
void aom_comp_avg_upsampled_pred_c(uint8_t *comp_pred, const uint8_t *pred,
clang-format's avatar
clang-format committed
275
276
277
278
279
280
281
282
283
                                   int width, int height, const uint8_t *ref,
                                   int ref_stride) {
  int i, j;
  int stride = ref_stride << 3;

  for (i = 0; i < height; i++) {
    for (j = 0; j < width; j++) {
      const int tmp = ref[(j << 3)] + pred[j];
      comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
284
    }
clang-format's avatar
clang-format committed
285
286
287
288
    comp_pred += width;
    pred += width;
    ref += stride;
  }
289
290
}

Yaowu Xu's avatar
Yaowu Xu committed
291
#if CONFIG_AOM_HIGHBITDEPTH
clang-format's avatar
clang-format committed
292
293
294
static void highbd_variance64(const uint8_t *a8, int a_stride,
                              const uint8_t *b8, int b_stride, int w, int h,
                              uint64_t *sse, int64_t *sum) {
Johann's avatar
Johann committed
295
296
297
298
299
300
301
  int i, j;

  uint16_t *a = CONVERT_TO_SHORTPTR(a8);
  uint16_t *b = CONVERT_TO_SHORTPTR(b8);
  *sum = 0;
  *sse = 0;

Johann's avatar
Johann committed
302
303
  for (i = 0; i < h; ++i) {
    for (j = 0; j < w; ++j) {
Johann's avatar
Johann committed
304
305
306
307
308
309
310
311
312
      const int diff = a[j] - b[j];
      *sum += diff;
      *sse += diff * diff;
    }
    a += a_stride;
    b += b_stride;
  }
}

clang-format's avatar
clang-format committed
313
314
315
static void highbd_8_variance(const uint8_t *a8, int a_stride,
                              const uint8_t *b8, int b_stride, int w, int h,
                              uint32_t *sse, int *sum) {
Johann's avatar
Johann committed
316
  uint64_t sse_long = 0;
317
  int64_t sum_long = 0;
Johann's avatar
Johann committed
318
  highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
Johann's avatar
Johann committed
319
  *sse = (uint32_t)sse_long;
Johann's avatar
Johann committed
320
321
322
  *sum = (int)sum_long;
}

clang-format's avatar
clang-format committed
323
324
325
static void highbd_10_variance(const uint8_t *a8, int a_stride,
                               const uint8_t *b8, int b_stride, int w, int h,
                               uint32_t *sse, int *sum) {
Johann's avatar
Johann committed
326
  uint64_t sse_long = 0;
327
  int64_t sum_long = 0;
Johann's avatar
Johann committed
328
  highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
Johann's avatar
Johann committed
329
  *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 4);
Johann's avatar
Johann committed
330
331
332
  *sum = (int)ROUND_POWER_OF_TWO(sum_long, 2);
}

clang-format's avatar
clang-format committed
333
334
335
static void highbd_12_variance(const uint8_t *a8, int a_stride,
                               const uint8_t *b8, int b_stride, int w, int h,
                               uint32_t *sse, int *sum) {
Johann's avatar
Johann committed
336
  uint64_t sse_long = 0;
337
  int64_t sum_long = 0;
Johann's avatar
Johann committed
338
  highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
Johann's avatar
Johann committed
339
  *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 8);
Johann's avatar
Johann committed
340
341
342
  *sum = (int)ROUND_POWER_OF_TWO(sum_long, 4);
}

clang-format's avatar
clang-format committed
343
#define HIGHBD_VAR(W, H)                                                       \
Yaowu Xu's avatar
Yaowu Xu committed
344
  uint32_t aom_highbd_8_variance##W##x##H##_c(const uint8_t *a, int a_stride,  \
clang-format's avatar
clang-format committed
345
346
347
348
349
350
351
                                              const uint8_t *b, int b_stride,  \
                                              uint32_t *sse) {                 \
    int sum;                                                                   \
    highbd_8_variance(a, a_stride, b, b_stride, W, H, sse, &sum);              \
    return *sse - (((int64_t)sum * sum) / (W * H));                            \
  }                                                                            \
                                                                               \
Yaowu Xu's avatar
Yaowu Xu committed
352
  uint32_t aom_highbd_10_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
clang-format's avatar
clang-format committed
353
354
355
356
357
358
359
360
361
                                               const uint8_t *b, int b_stride, \
                                               uint32_t *sse) {                \
    int sum;                                                                   \
    int64_t var;                                                               \
    highbd_10_variance(a, a_stride, b, b_stride, W, H, sse, &sum);             \
    var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H));                  \
    return (var >= 0) ? (uint32_t)var : 0;                                     \
  }                                                                            \
                                                                               \
Yaowu Xu's avatar
Yaowu Xu committed
362
  uint32_t aom_highbd_12_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
clang-format's avatar
clang-format committed
363
364
365
366
367
368
369
370
                                               const uint8_t *b, int b_stride, \
                                               uint32_t *sse) {                \
    int sum;                                                                   \
    int64_t var;                                                               \
    highbd_12_variance(a, a_stride, b, b_stride, W, H, sse, &sum);             \
    var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H));                  \
    return (var >= 0) ? (uint32_t)var : 0;                                     \
  }
Johann's avatar
Johann committed
371

clang-format's avatar
clang-format committed
372
#define HIGHBD_GET_VAR(S)                                                    \
Yaowu Xu's avatar
Yaowu Xu committed
373
  void aom_highbd_8_get##S##x##S##var_c(const uint8_t *src, int src_stride,  \
clang-format's avatar
clang-format committed
374
375
376
377
378
                                        const uint8_t *ref, int ref_stride,  \
                                        uint32_t *sse, int *sum) {           \
    highbd_8_variance(src, src_stride, ref, ref_stride, S, S, sse, sum);     \
  }                                                                          \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
379
  void aom_highbd_10_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
clang-format's avatar
clang-format committed
380
381
382
383
384
                                         const uint8_t *ref, int ref_stride, \
                                         uint32_t *sse, int *sum) {          \
    highbd_10_variance(src, src_stride, ref, ref_stride, S, S, sse, sum);    \
  }                                                                          \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
385
  void aom_highbd_12_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
clang-format's avatar
clang-format committed
386
387
388
389
                                         const uint8_t *ref, int ref_stride, \
                                         uint32_t *sse, int *sum) {          \
    highbd_12_variance(src, src_stride, ref, ref_stride, S, S, sse, sum);    \
  }
Johann's avatar
Johann committed
390

clang-format's avatar
clang-format committed
391
#define HIGHBD_MSE(W, H)                                                      \
Yaowu Xu's avatar
Yaowu Xu committed
392
  uint32_t aom_highbd_8_mse##W##x##H##_c(const uint8_t *src, int src_stride,  \
clang-format's avatar
clang-format committed
393
394
395
396
397
398
399
                                         const uint8_t *ref, int ref_stride,  \
                                         uint32_t *sse) {                     \
    int sum;                                                                  \
    highbd_8_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum);     \
    return *sse;                                                              \
  }                                                                           \
                                                                              \
Yaowu Xu's avatar
Yaowu Xu committed
400
  uint32_t aom_highbd_10_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
clang-format's avatar
clang-format committed
401
402
403
404
405
406
407
                                          const uint8_t *ref, int ref_stride, \
                                          uint32_t *sse) {                    \
    int sum;                                                                  \
    highbd_10_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum);    \
    return *sse;                                                              \
  }                                                                           \
                                                                              \
Yaowu Xu's avatar
Yaowu Xu committed
408
  uint32_t aom_highbd_12_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
clang-format's avatar
clang-format committed
409
410
411
412
413
414
                                          const uint8_t *ref, int ref_stride, \
                                          uint32_t *sse) {                    \
    int sum;                                                                  \
    highbd_12_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum);    \
    return *sse;                                                              \
  }
Johann's avatar
Johann committed
415

Yaowu Xu's avatar
Yaowu Xu committed
416
void aom_highbd_var_filter_block2d_bil_first_pass(
clang-format's avatar
clang-format committed
417
418
419
    const uint8_t *src_ptr8, uint16_t *output_ptr,
    unsigned int src_pixels_per_line, int pixel_step,
    unsigned int output_height, unsigned int output_width,
Johann's avatar
Johann committed
420
421
422
423
424
    const uint8_t *filter) {
  unsigned int i, j;
  uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8);
  for (i = 0; i < output_height; ++i) {
    for (j = 0; j < output_width; ++j) {
clang-format's avatar
clang-format committed
425
426
427
      output_ptr[j] = ROUND_POWER_OF_TWO(
          (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
          FILTER_BITS);
Johann's avatar
Johann committed
428
429
430
431
432
433
434
435
436
437

      ++src_ptr;
    }

    // Next row...
    src_ptr += src_pixels_per_line - output_width;
    output_ptr += output_width;
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
438
void aom_highbd_var_filter_block2d_bil_second_pass(
clang-format's avatar
clang-format committed
439
440
441
    const uint16_t *src_ptr, uint16_t *output_ptr,
    unsigned int src_pixels_per_line, unsigned int pixel_step,
    unsigned int output_height, unsigned int output_width,
Johann's avatar
Johann committed
442
    const uint8_t *filter) {
clang-format's avatar
clang-format committed
443
  unsigned int i, j;
Johann's avatar
Johann committed
444
445
446

  for (i = 0; i < output_height; ++i) {
    for (j = 0; j < output_width; ++j) {
clang-format's avatar
clang-format committed
447
448
449
      output_ptr[j] = ROUND_POWER_OF_TWO(
          (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
          FILTER_BITS);
Johann's avatar
Johann committed
450
451
452
453
454
455
456
457
      ++src_ptr;
    }

    src_ptr += src_pixels_per_line - output_width;
    output_ptr += output_width;
  }
}

clang-format's avatar
clang-format committed
458
#define HIGHBD_SUBPIX_VAR(W, H)                                              \
Yaowu Xu's avatar
Yaowu Xu committed
459
  uint32_t aom_highbd_8_sub_pixel_variance##W##x##H##_c(                     \
clang-format's avatar
clang-format committed
460
461
462
463
464
      const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
      const uint8_t *dst, int dst_stride, uint32_t *sse) {                   \
    uint16_t fdata3[(H + 1) * W];                                            \
    uint16_t temp2[H * W];                                                   \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
465
    aom_highbd_var_filter_block2d_bil_first_pass(                            \
clang-format's avatar
clang-format committed
466
        src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
Yaowu Xu's avatar
Yaowu Xu committed
467
    aom_highbd_var_filter_block2d_bil_second_pass(                           \
clang-format's avatar
clang-format committed
468
469
        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);            \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
470
    return aom_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W,  \
clang-format's avatar
clang-format committed
471
472
473
                                              dst, dst_stride, sse);         \
  }                                                                          \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
474
  uint32_t aom_highbd_10_sub_pixel_variance##W##x##H##_c(                    \
clang-format's avatar
clang-format committed
475
476
477
478
479
      const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
      const uint8_t *dst, int dst_stride, uint32_t *sse) {                   \
    uint16_t fdata3[(H + 1) * W];                                            \
    uint16_t temp2[H * W];                                                   \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
480
    aom_highbd_var_filter_block2d_bil_first_pass(                            \
clang-format's avatar
clang-format committed
481
        src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
Yaowu Xu's avatar
Yaowu Xu committed
482
    aom_highbd_var_filter_block2d_bil_second_pass(                           \
clang-format's avatar
clang-format committed
483
484
        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);            \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
485
    return aom_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
clang-format's avatar
clang-format committed
486
487
488
                                               dst, dst_stride, sse);        \
  }                                                                          \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
489
  uint32_t aom_highbd_12_sub_pixel_variance##W##x##H##_c(                    \
clang-format's avatar
clang-format committed
490
491
492
493
494
      const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
      const uint8_t *dst, int dst_stride, uint32_t *sse) {                   \
    uint16_t fdata3[(H + 1) * W];                                            \
    uint16_t temp2[H * W];                                                   \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
495
    aom_highbd_var_filter_block2d_bil_first_pass(                            \
clang-format's avatar
clang-format committed
496
        src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
Yaowu Xu's avatar
Yaowu Xu committed
497
    aom_highbd_var_filter_block2d_bil_second_pass(                           \
clang-format's avatar
clang-format committed
498
499
        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);            \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
500
    return aom_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
clang-format's avatar
clang-format committed
501
502
                                               dst, dst_stride, sse);        \
  }
Johann's avatar
Johann committed
503

clang-format's avatar
clang-format committed
504
#define HIGHBD_SUBPIX_AVG_VAR(W, H)                                          \
Yaowu Xu's avatar
Yaowu Xu committed
505
  uint32_t aom_highbd_8_sub_pixel_avg_variance##W##x##H##_c(                 \
clang-format's avatar
clang-format committed
506
507
508
509
510
511
512
      const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
      const uint8_t *dst, int dst_stride, uint32_t *sse,                     \
      const uint8_t *second_pred) {                                          \
    uint16_t fdata3[(H + 1) * W];                                            \
    uint16_t temp2[H * W];                                                   \
    DECLARE_ALIGNED(16, uint16_t, temp3[H * W]);                             \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
513
    aom_highbd_var_filter_block2d_bil_first_pass(                            \
clang-format's avatar
clang-format committed
514
        src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
Yaowu Xu's avatar
Yaowu Xu committed
515
    aom_highbd_var_filter_block2d_bil_second_pass(                           \
clang-format's avatar
clang-format committed
516
517
        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);            \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
518
    aom_highbd_comp_avg_pred_c(temp3, second_pred, W, H,                     \
clang-format's avatar
clang-format committed
519
520
                               CONVERT_TO_BYTEPTR(temp2), W);                \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
521
    return aom_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W,  \
clang-format's avatar
clang-format committed
522
523
524
                                              dst, dst_stride, sse);         \
  }                                                                          \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
525
  uint32_t aom_highbd_10_sub_pixel_avg_variance##W##x##H##_c(                \
clang-format's avatar
clang-format committed
526
527
528
529
530
531
532
      const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
      const uint8_t *dst, int dst_stride, uint32_t *sse,                     \
      const uint8_t *second_pred) {                                          \
    uint16_t fdata3[(H + 1) * W];                                            \
    uint16_t temp2[H * W];                                                   \
    DECLARE_ALIGNED(16, uint16_t, temp3[H * W]);                             \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
533
    aom_highbd_var_filter_block2d_bil_first_pass(                            \
clang-format's avatar
clang-format committed
534
        src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
Yaowu Xu's avatar
Yaowu Xu committed
535
    aom_highbd_var_filter_block2d_bil_second_pass(                           \
clang-format's avatar
clang-format committed
536
537
        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);            \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
538
    aom_highbd_comp_avg_pred_c(temp3, second_pred, W, H,                     \
clang-format's avatar
clang-format committed
539
540
                               CONVERT_TO_BYTEPTR(temp2), W);                \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
541
    return aom_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
clang-format's avatar
clang-format committed
542
543
544
                                               dst, dst_stride, sse);        \
  }                                                                          \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
545
  uint32_t aom_highbd_12_sub_pixel_avg_variance##W##x##H##_c(                \
clang-format's avatar
clang-format committed
546
547
548
549
550
551
552
      const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
      const uint8_t *dst, int dst_stride, uint32_t *sse,                     \
      const uint8_t *second_pred) {                                          \
    uint16_t fdata3[(H + 1) * W];                                            \
    uint16_t temp2[H * W];                                                   \
    DECLARE_ALIGNED(16, uint16_t, temp3[H * W]);                             \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
553
    aom_highbd_var_filter_block2d_bil_first_pass(                            \
clang-format's avatar
clang-format committed
554
        src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
Yaowu Xu's avatar
Yaowu Xu committed
555
    aom_highbd_var_filter_block2d_bil_second_pass(                           \
clang-format's avatar
clang-format committed
556
557
        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);            \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
558
    aom_highbd_comp_avg_pred_c(temp3, second_pred, W, H,                     \
clang-format's avatar
clang-format committed
559
560
                               CONVERT_TO_BYTEPTR(temp2), W);                \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
561
    return aom_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
clang-format's avatar
clang-format committed
562
563
                                               dst, dst_stride, sse);        \
  }
Johann's avatar
Johann committed
564
565
566

/* All three forms of the variance are available in the same sizes. */
#define HIGHBD_VARIANCES(W, H) \
clang-format's avatar
clang-format committed
567
568
569
  HIGHBD_VAR(W, H)             \
  HIGHBD_SUBPIX_VAR(W, H)      \
  HIGHBD_SUBPIX_AVG_VAR(W, H)
Johann's avatar
Johann committed
570

Yaowu Xu's avatar
Yaowu Xu committed
571
#if CONFIG_AV1 && CONFIG_EXT_PARTITION
572
573
574
HIGHBD_VARIANCES(128, 128)
HIGHBD_VARIANCES(128, 64)
HIGHBD_VARIANCES(64, 128)
Yaowu Xu's avatar
Yaowu Xu committed
575
#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
Johann's avatar
Johann committed
576
577
578
579
580
581
582
583
584
585
586
587
588
589
HIGHBD_VARIANCES(64, 64)
HIGHBD_VARIANCES(64, 32)
HIGHBD_VARIANCES(32, 64)
HIGHBD_VARIANCES(32, 32)
HIGHBD_VARIANCES(32, 16)
HIGHBD_VARIANCES(16, 32)
HIGHBD_VARIANCES(16, 16)
HIGHBD_VARIANCES(16, 8)
HIGHBD_VARIANCES(8, 16)
HIGHBD_VARIANCES(8, 8)
HIGHBD_VARIANCES(8, 4)
HIGHBD_VARIANCES(4, 8)
HIGHBD_VARIANCES(4, 4)

Johann's avatar
Johann committed
590
591
592
593
594
595
596
597
HIGHBD_GET_VAR(8)
HIGHBD_GET_VAR(16)

HIGHBD_MSE(16, 16)
HIGHBD_MSE(16, 8)
HIGHBD_MSE(8, 16)
HIGHBD_MSE(8, 8)

Yaowu Xu's avatar
Yaowu Xu committed
598
void aom_highbd_comp_avg_pred_c(uint16_t *comp_pred, const uint8_t *pred8,
599
600
                                int width, int height, const uint8_t *ref8,
                                int ref_stride) {
Johann's avatar
Johann committed
601
602
603
  int i, j;
  uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
Johann's avatar
Johann committed
604
605
  for (i = 0; i < height; ++i) {
    for (j = 0; j < width; ++j) {
Johann's avatar
Johann committed
606
607
608
609
610
611
612
613
      const int tmp = pred[j] + ref[j];
      comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
    }
    comp_pred += width;
    pred += width;
    ref += ref_stride;
  }
}
614

Yaowu Xu's avatar
Yaowu Xu committed
615
void aom_highbd_upsampled_pred_c(uint16_t *comp_pred, int width, int height,
clang-format's avatar
clang-format committed
616
                                 const uint8_t *ref8, int ref_stride) {
617
618
619
620
621
622
623
624
625
626
627
628
629
  int i, j;
  int stride = ref_stride << 3;

  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
  for (i = 0; i < height; ++i) {
    for (j = 0; j < width; ++j) {
      comp_pred[j] = ref[(j << 3)];
    }
    comp_pred += width;
    ref += stride;
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
630
void aom_highbd_comp_avg_upsampled_pred_c(uint16_t *comp_pred,
clang-format's avatar
clang-format committed
631
632
                                          const uint8_t *pred8, int width,
                                          int height, const uint8_t *ref8,
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
                                          int ref_stride) {
  int i, j;
  int stride = ref_stride << 3;

  uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
  for (i = 0; i < height; ++i) {
    for (j = 0; j < width; ++j) {
      const int tmp = pred[j] + ref[(j << 3)];
      comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
    }
    comp_pred += width;
    pred += width;
    ref += stride;
  }
}
Yaowu Xu's avatar
Yaowu Xu committed
649
#endif  // CONFIG_AOM_HIGHBITDEPTH
650

Yaowu Xu's avatar
Yaowu Xu committed
651
#if CONFIG_AV1 && CONFIG_EXT_INTER
clang-format's avatar
clang-format committed
652
653
654
void masked_variance(const uint8_t *a, int a_stride, const uint8_t *b,
                     int b_stride, const uint8_t *m, int m_stride, int w, int h,
                     unsigned int *sse, int *sum) {
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
  int i, j;

  int64_t sum64 = 0;
  uint64_t sse64 = 0;

  for (i = 0; i < h; i++) {
    for (j = 0; j < w; j++) {
      const int diff = (a[j] - b[j]) * (m[j]);
      sum64 += diff;
      sse64 += diff * diff;
    }

    a += a_stride;
    b += b_stride;
    m += m_stride;
  }
clang-format's avatar
clang-format committed
671
  sum64 = (sum64 >= 0) ? sum64 : -sum64;
Yaowu Xu's avatar
Yaowu Xu committed
672
673
  *sum = (int)ROUND_POWER_OF_TWO(sum64, 6);
  *sse = (uint32_t)ROUND_POWER_OF_TWO(sse64, 12);
674
675
}

clang-format's avatar
clang-format committed
676
#define MASK_VAR(W, H)                                                       \
Yaowu Xu's avatar
Yaowu Xu committed
677
  unsigned int aom_masked_variance##W##x##H##_c(                             \
clang-format's avatar
clang-format committed
678
679
680
681
682
683
      const uint8_t *a, int a_stride, const uint8_t *b, int b_stride,        \
      const uint8_t *m, int m_stride, unsigned int *sse) {                   \
    int sum;                                                                 \
    masked_variance(a, a_stride, b, b_stride, m, m_stride, W, H, sse, &sum); \
    return *sse - (((int64_t)sum * sum) / (W * H));                          \
  }
684

clang-format's avatar
clang-format committed
685
#define MASK_SUBPIX_VAR(W, H)                                                 \
Yaowu Xu's avatar
Yaowu Xu committed
686
  unsigned int aom_masked_sub_pixel_variance##W##x##H##_c(                    \
clang-format's avatar
clang-format committed
687
688
689
690
691
692
693
694
695
696
697
      const uint8_t *src, int src_stride, int xoffset, int yoffset,           \
      const uint8_t *dst, int dst_stride, const uint8_t *msk, int msk_stride, \
      unsigned int *sse) {                                                    \
    uint16_t fdata3[(H + 1) * W];                                             \
    uint8_t temp2[H * W];                                                     \
                                                                              \
    var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W,   \
                                      bilinear_filters_2t[xoffset]);          \
    var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,             \
                                       bilinear_filters_2t[yoffset]);         \
                                                                              \
Yaowu Xu's avatar
Yaowu Xu committed
698
    return aom_masked_variance##W##x##H##_c(temp2, W, dst, dst_stride, msk,   \
clang-format's avatar
clang-format committed
699
700
                                            msk_stride, sse);                 \
  }
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751

MASK_VAR(4, 4)
MASK_SUBPIX_VAR(4, 4)

MASK_VAR(4, 8)
MASK_SUBPIX_VAR(4, 8)

MASK_VAR(8, 4)
MASK_SUBPIX_VAR(8, 4)

MASK_VAR(8, 8)
MASK_SUBPIX_VAR(8, 8)

MASK_VAR(8, 16)
MASK_SUBPIX_VAR(8, 16)

MASK_VAR(16, 8)
MASK_SUBPIX_VAR(16, 8)

MASK_VAR(16, 16)
MASK_SUBPIX_VAR(16, 16)

MASK_VAR(16, 32)
MASK_SUBPIX_VAR(16, 32)

MASK_VAR(32, 16)
MASK_SUBPIX_VAR(32, 16)

MASK_VAR(32, 32)
MASK_SUBPIX_VAR(32, 32)

MASK_VAR(32, 64)
MASK_SUBPIX_VAR(32, 64)

MASK_VAR(64, 32)
MASK_SUBPIX_VAR(64, 32)

MASK_VAR(64, 64)
MASK_SUBPIX_VAR(64, 64)

#if CONFIG_EXT_PARTITION
MASK_VAR(64, 128)
MASK_SUBPIX_VAR(64, 128)

MASK_VAR(128, 64)
MASK_SUBPIX_VAR(128, 64)

MASK_VAR(128, 128)
MASK_SUBPIX_VAR(128, 128)
#endif  // CONFIG_EXT_PARTITION

Yaowu Xu's avatar
Yaowu Xu committed
752
#if CONFIG_AOM_HIGHBITDEPTH
clang-format's avatar
clang-format committed
753
754
755
756
void highbd_masked_variance64(const uint8_t *a8, int a_stride,
                              const uint8_t *b8, int b_stride, const uint8_t *m,
                              int m_stride, int w, int h, uint64_t *sse,
                              int64_t *sum) {
757
758
759
760
  int i, j;
  uint16_t *a = CONVERT_TO_SHORTPTR(a8);
  uint16_t *b = CONVERT_TO_SHORTPTR(b8);

761
762
  *sum = 0;
  *sse = 0;
763
764
765
766

  for (i = 0; i < h; i++) {
    for (j = 0; j < w; j++) {
      const int diff = (a[j] - b[j]) * (m[j]);
767
768
      *sum += (int64_t)diff;
      *sse += (int64_t)diff * diff;
769
770
771
772
773
774
    }

    a += a_stride;
    b += b_stride;
    m += m_stride;
  }
clang-format's avatar
clang-format committed
775
  *sum = (*sum >= 0) ? *sum : -*sum;
776
777
  *sum = ROUND_POWER_OF_TWO(*sum, 6);
  *sse = ROUND_POWER_OF_TWO(*sse, 12);
778
779
}

clang-format's avatar
clang-format committed
780
781
782
void highbd_masked_variance(const uint8_t *a8, int a_stride, const uint8_t *b8,
                            int b_stride, const uint8_t *m, int m_stride, int w,
                            int h, unsigned int *sse, int *sum) {
783
  int64_t sum64;
784
  uint64_t sse64;
clang-format's avatar
clang-format committed
785
786
  highbd_masked_variance64(a8, a_stride, b8, b_stride, m, m_stride, w, h,
                           &sse64, &sum64);
787
  *sum = (int)sum64;
788
789
790
  *sse = (unsigned int)sse64;
}

clang-format's avatar
clang-format committed
791
792
793
void highbd_10_masked_variance(const uint8_t *a8, int a_stride,
                               const uint8_t *b8, int b_stride,
                               const uint8_t *m, int m_stride, int w, int h,
794
                               unsigned int *sse, int *sum) {
795
  int64_t sum64;
796
  uint64_t sse64;
clang-format's avatar
clang-format committed
797
798
  highbd_masked_variance64(a8, a_stride, b8, b_stride, m, m_stride, w, h,
                           &sse64, &sum64);
799
  *sum = (int)ROUND_POWER_OF_TWO(sum64, 2);
800
801
802
  *sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 4);
}

clang-format's avatar
clang-format committed
803
804
805
void highbd_12_masked_variance(const uint8_t *a8, int a_stride,
                               const uint8_t *b8, int b_stride,
                               const uint8_t *m, int m_stride, int w, int h,
806
                               unsigned int *sse, int *sum) {
807
  int64_t sum64;
808
  uint64_t sse64;
clang-format's avatar
clang-format committed
809
810
  highbd_masked_variance64(a8, a_stride, b8, b_stride, m, m_stride, w, h,
                           &sse64, &sum64);
811
  *sum = (int)ROUND_POWER_OF_TWO(sum64, 4);
812
813
814
  *sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 8);
}

clang-format's avatar
clang-format committed
815
#define HIGHBD_MASK_VAR(W, H)                                                \
Yaowu Xu's avatar
Yaowu Xu committed
816
  unsigned int aom_highbd_masked_variance##W##x##H##_c(                      \
clang-format's avatar
clang-format committed
817
818
819
820
821
822
823
824
      const uint8_t *a, int a_stride, const uint8_t *b, int b_stride,        \
      const uint8_t *m, int m_stride, unsigned int *sse) {                   \
    int sum;                                                                 \
    highbd_masked_variance(a, a_stride, b, b_stride, m, m_stride, W, H, sse, \
                           &sum);                                            \
    return *sse - (((int64_t)sum * sum) / (W * H));                          \
  }                                                                          \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
825
  unsigned int aom_highbd_10_masked_variance##W##x##H##_c(                   \
clang-format's avatar
clang-format committed
826
827
828
829
830
831
832
833
      const uint8_t *a, int a_stride, const uint8_t *b, int b_stride,        \
      const uint8_t *m, int m_stride, unsigned int *sse) {                   \
    int sum;                                                                 \
    highbd_10_masked_variance(a, a_stride, b, b_stride, m, m_stride, W, H,   \
                              sse, &sum);                                    \
    return *sse - (((int64_t)sum * sum) / (W * H));                          \
  }                                                                          \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
834
  unsigned int aom_highbd_12_masked_variance##W##x##H##_c(                   \
clang-format's avatar
clang-format committed
835
836
837
838
839
840
841
      const uint8_t *a, int a_stride, const uint8_t *b, int b_stride,        \
      const uint8_t *m, int m_stride, unsigned int *sse) {                   \
    int sum;                                                                 \
    highbd_12_masked_variance(a, a_stride, b, b_stride, m, m_stride, W, H,   \
                              sse, &sum);                                    \
    return *sse - (((int64_t)sum * sum) / (W * H));                          \
  }
842

clang-format's avatar
clang-format committed
843
#define HIGHBD_MASK_SUBPIX_VAR(W, H)                                          \
Yaowu Xu's avatar
Yaowu Xu committed
844
  unsigned int aom_highbd_masked_sub_pixel_variance##W##x##H##_c(             \
clang-format's avatar
clang-format committed
845
846
847
848
849
850
      const uint8_t *src, int src_stride, int xoffset, int yoffset,           \
      const uint8_t *dst, int dst_stride, const uint8_t *msk, int msk_stride, \
      unsigned int *sse) {                                                    \
    uint16_t fdata3[(H + 1) * W];                                             \
    uint16_t temp2[H * W];                                                    \
                                                                              \
Yaowu Xu's avatar
Yaowu Xu committed
851
    aom_highbd_var_filter_block2d_bil_first_pass(                             \
clang-format's avatar
clang-format committed
852
        src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);  \
Yaowu Xu's avatar
Yaowu Xu committed
853
    aom_highbd_var_filter_block2d_bil_second_pass(                            \
clang-format's avatar
clang-format committed
854
855
        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);             \
                                                                              \
Yaowu Xu's avatar
Yaowu Xu committed
856
    return aom_highbd_masked_variance##W##x##H##_c(                           \
clang-format's avatar
clang-format committed
857
858
859
        CONVERT_TO_BYTEPTR(temp2), W, dst, dst_stride, msk, msk_stride, sse); \
  }                                                                           \
                                                                              \
Yaowu Xu's avatar
Yaowu Xu committed
860
  unsigned int aom_highbd_10_masked_sub_pixel_variance##W##x##H##_c(          \
clang-format's avatar
clang-format committed
861
862
863
864
865
866
      const uint8_t *src, int src_stride, int xoffset, int yoffset,           \
      const uint8_t *dst, int dst_stride, const uint8_t *msk, int msk_stride, \
      unsigned int *sse) {                                                    \
    uint16_t fdata3[(H + 1) * W];                                             \
    uint16_t temp2[H * W];                                                    \
                                                                              \
Yaowu Xu's avatar
Yaowu Xu committed
867
    aom_highbd_var_filter_block2d_bil_first_pass(                             \
clang-format's avatar
clang-format committed
868
        src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);  \
Yaowu Xu's avatar
Yaowu Xu committed
869
    aom_highbd_var_filter_block2d_bil_second_pass(                            \
clang-format's avatar
clang-format committed
870
871
        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);             \
                                                                              \
Yaowu Xu's avatar
Yaowu Xu committed
872
    return aom_highbd_10_masked_variance##W##x##H##_c(                        \
clang-format's avatar
clang-format committed
873
874
875
        CONVERT_TO_BYTEPTR(temp2), W, dst, dst_stride, msk, msk_stride, sse); \
  }                                                                           \
                                                                              \
Yaowu Xu's avatar
Yaowu Xu committed
876
  unsigned int aom_highbd_12_masked_sub_pixel_variance##W##x##H##_c(          \
clang-format's avatar
clang-format committed
877
878
879
880
881
882
      const uint8_t *src, int src_stride, int xoffset, int yoffset,           \
      const uint8_t *dst, int dst_stride, const uint8_t *msk, int msk_stride, \
      unsigned int *sse) {                                                    \
    uint16_t fdata3[(H + 1) * W];                                             \
    uint16_t temp2[H * W];                                                    \
                                                                              \
Yaowu Xu's avatar
Yaowu Xu committed
883
    aom_highbd_var_filter_block2d_bil_first_pass(                             \
clang-format's avatar
clang-format committed
884
        src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);  \
Yaowu Xu's avatar
Yaowu Xu committed
885
    aom_highbd_var_filter_block2d_bil_second_pass(                            \
clang-format's avatar
clang-format committed
886
887
        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);             \
                                                                              \
Yaowu Xu's avatar
Yaowu Xu committed
888
    return aom_highbd_12_masked_variance##W##x##H##_c(                        \
clang-format's avatar
clang-format committed
889
890
        CONVERT_TO_BYTEPTR(temp2), W, dst, dst_stride, msk, msk_stride, sse); \
  }
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940

HIGHBD_MASK_VAR(4, 4)
HIGHBD_MASK_SUBPIX_VAR(4, 4)

HIGHBD_MASK_VAR(4, 8)
HIGHBD_MASK_SUBPIX_VAR(4, 8)

HIGHBD_MASK_VAR(8, 4)
HIGHBD_MASK_SUBPIX_VAR(8, 4)

HIGHBD_MASK_VAR(8, 8)
HIGHBD_MASK_SUBPIX_VAR(8, 8)

HIGHBD_MASK_VAR(8, 16)
HIGHBD_MASK_SUBPIX_VAR(8, 16)

HIGHBD_MASK_VAR(16, 8)
HIGHBD_MASK_SUBPIX_VAR(16, 8)

HIGHBD_MASK_VAR(16, 16)
HIGHBD_MASK_SUBPIX_VAR(16, 16)

HIGHBD_MASK_VAR(16, 32)
HIGHBD_MASK_SUBPIX_VAR(16, 32)

HIGHBD_MASK_VAR(32, 16)
HIGHBD_MASK_SUBPIX_VAR(32, 16)

HIGHBD_MASK_VAR(32, 32)
HIGHBD_MASK_SUBPIX_VAR(32, 32)

HIGHBD_MASK_VAR(32, 64)
HIGHBD_MASK_SUBPIX_VAR(32, 64)

HIGHBD_MASK_VAR(64, 32)
HIGHBD_MASK_SUBPIX_VAR(64, 32)

HIGHBD_MASK_VAR(64, 64)
HIGHBD_MASK_SUBPIX_VAR(64, 64)

#if CONFIG_EXT_PARTITION
HIGHBD_MASK_VAR(64, 128)
HIGHBD_MASK_SUBPIX_VAR(64, 128)

HIGHBD_MASK_VAR(128, 64)
HIGHBD_MASK_SUBPIX_VAR(128, 64)

HIGHBD_MASK_VAR(128, 128)
HIGHBD_MASK_SUBPIX_VAR(128, 128)
#endif  // CONFIG_EXT_PARTITION
Yaowu Xu's avatar
Yaowu Xu committed
941
942
#endif  // CONFIG_AOM_HIGHBITDEPTH
#endif  // CONFIG_AV1 && CONFIG_EXT_INTER
943

Yaowu Xu's avatar
Yaowu Xu committed
944
#if CONFIG_AV1 && CONFIG_OBMC
clang-format's avatar
clang-format committed
945
static INLINE void obmc_variance(const uint8_t *pre, int pre_stride,
946
947
                                 const int32_t *wsrc, const int32_t *mask,
                                 int w, int h, unsigned int *sse, int *sum) {
948
949
950
951
952
953
954
  int i, j;

  *sse = 0;
  *sum = 0;

  for (i = 0; i < h; i++) {
    for (j = 0; j < w; j++) {
955
      int diff = ROUND_POWER_OF_TWO_SIGNED(wsrc[j] - pre[j] * mask[j], 12);
956
957
958
959
      *sum += diff;
      *sse += diff * diff;
    }

960
961
962
    pre += pre_stride;
    wsrc += w;
    mask += w;
963
964
965
  }
}

clang-format's avatar
clang-format committed
966
#define OBMC_VAR(W, H)                                           \
Yaowu Xu's avatar
Yaowu Xu committed
967
  unsigned int aom_obmc_variance##W##x##H##_c(                   \
clang-format's avatar
clang-format committed
968
969
970
971
972
973
      const uint8_t *pre, int pre_stride, const int32_t *wsrc,   \
      const int32_t *mask, unsigned int *sse) {                  \
    int sum;                                                     \
    obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum); \
    return *sse - (((int64_t)sum * sum) / (W * H));              \
  }
974

clang-format's avatar
clang-format committed
975
#define OBMC_SUBPIX_VAR(W, H)                                               \
Yaowu Xu's avatar
Yaowu Xu committed
976
  unsigned int aom_obmc_sub_pixel_variance##W##x##H##_c(                    \
clang-format's avatar
clang-format committed
977
978
979
980
981
982
983
984
985
986
      const uint8_t *pre, int pre_stride, int xoffset, int yoffset,         \
      const int32_t *wsrc, const int32_t *mask, unsigned int *sse) {        \
    uint16_t fdata3[(H + 1) * W];                                           \
    uint8_t temp2[H * W];                                                   \
                                                                            \
    var_filter_block2d_bil_first_pass(pre, fdata3, pre_stride, 1, H + 1, W, \
                                      bilinear_filters_2t[xoffset]);        \
    var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,           \
                                       bilinear_filters_2t[yoffset]);       \
                                                                            \
Yaowu Xu's avatar
Yaowu Xu committed
987
    return aom_obmc_variance##W##x##H##_c(temp2, W, wsrc, mask, sse);       \
clang-format's avatar
clang-format committed
988
  }
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039

OBMC_VAR(4, 4)
OBMC_SUBPIX_VAR(4, 4)

OBMC_VAR(4, 8)
OBMC_SUBPIX_VAR(4, 8)

OBMC_VAR(8, 4)
OBMC_SUBPIX_VAR(8, 4)

OBMC_VAR(8, 8)
OBMC_SUBPIX_VAR(8, 8)

OBMC_VAR(8, 16)
OBMC_SUBPIX_VAR(8, 16)

OBMC_VAR(16, 8)
OBMC_SUBPIX_VAR(16, 8)

OBMC_VAR(16, 16)
OBMC_SUBPIX_VAR(16, 16)

OBMC_VAR(16, 32)
OBMC_SUBPIX_VAR(16, 32)

OBMC_VAR(32, 16)
OBMC_SUBPIX_VAR(32, 16)

OBMC_VAR(32, 32)
OBMC_SUBPIX_VAR(32, 32)

OBMC_VAR(32, 64)
OBMC_SUBPIX_VAR(32, 64)

OBMC_VAR(64, 32)
OBMC_SUBPIX_VAR(64, 32)

OBMC_VAR(64, 64)
OBMC_SUBPIX_VAR(64, 64)

#if CONFIG_EXT_PARTITION
OBMC_VAR(64, 128)
OBMC_SUBPIX_VAR(64, 128)

OBMC_VAR(128, 64)
OBMC_SUBPIX_VAR(128, 64)

OBMC_VAR(128, 128)
OBMC_SUBPIX_VAR(128, 128)
#endif  // CONFIG_EXT_PARTITION

Yaowu Xu's avatar
Yaowu Xu committed
1040
#if CONFIG_AOM_HIGHBITDEPTH
1041
1042
static INLINE void highbd_obmc_variance64(const uint8_t *pre8, int pre_stride,
                                          const int32_t *wsrc,
clang-format's avatar
clang-format committed
1043
                                          const int32_t *mask, int w, int h,
1044
                                          uint64_t *sse, int64_t *sum) {
1045
  int i, j;
1046
  uint16_t *pre = CONVERT_TO_SHORTPTR(pre8);
1047
1048
1049
1050
1051
1052

  *sse = 0;
  *sum = 0;

  for (i = 0; i < h; i++) {
    for (j = 0; j < w; j++) {
1053
      int diff = ROUND_POWER_OF_TWO_SIGNED(wsrc[j] - pre[j] * mask[j], 12);
1054
1055
1056
1057
      *sum += diff;
      *sse += diff * diff;
    }

1058
1059
1060
    pre += pre_stride;
    wsrc += w;
    mask += w;
1061
1062
1063
  }
}

1064
1065
static INLINE void highbd_obmc_variance(const uint8_t *pre8, int pre_stride,
                                        const int32_t *wsrc,
clang-format's avatar
clang-format committed
1066
                                        const int32_t *mask, int w, int h,
1067
                                        unsigned int *sse, int *sum) {
1068
1069
  int64_t sum64;
  uint64_t sse64;
1070
  highbd_obmc_variance64(pre8, pre_stride, wsrc, mask, w, h, &sse64, &sum64);
1071
1072
1073
1074
  *sum = (int)sum64;
  *sse = (unsigned int)sse64;
}

clang-format's avatar
clang-format committed
1075
static INLINE void highbd_10_obmc_variance(const uint8_t *pre8, int pre_stride,
1076
                                           const int32_t *wsrc,
clang-format's avatar
clang-format committed
1077
                                           const int32_t *mask, int w, int h,
1078
                                           unsigned int *sse, int *sum) {
1079
1080
  int64_t sum64;
  uint64_t sse64;
1081
  highbd_obmc_variance64(pre8, pre_stride, wsrc, mask, w, h, &sse64, &sum64);
1082
1083
1084
1085
  *sum = (int)ROUND_POWER_OF_TWO(sum64, 2);
  *sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 4);
}

clang-format's avatar
clang-format committed
1086
static INLINE void highbd_12_obmc_variance(const uint8_t *pre8, int pre_stride,
1087
                                           const int32_t *wsrc,
clang-format's avatar
clang-format committed
1088
                                           const int32_t *mask, int w, int h,
1089
                                           unsigned int *sse, int *sum) {
1090
1091
  int64_t sum64;
  uint64_t sse64;
1092
  highbd_obmc_variance64(pre8, pre_stride, wsrc, mask, w, h, &sse64, &sum64);
1093
1094
1095
1096
  *sum = (int)ROUND_POWER_OF_TWO(sum64, 4);
  *sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 8);
}

clang-format's avatar
clang-format committed
1097
#define HIGHBD_OBMC_VAR(W, H)                                              \
Yaowu Xu's avatar
Yaowu Xu committed
1098
  unsigned int aom_highbd_obmc_variance##W##x##H##_c(                      \
clang-format's avatar
clang-format committed
1099
1100
1101
1102
1103
1104
1105
      const uint8_t *pre, int pre_stride, const int32_t *wsrc,             \
      const int32_t *mask, unsigned int *sse) {                            \
    int sum;                                                               \
    highbd_obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum);    \
    return *sse - (((int64_t)sum * sum) / (W * H));                        \
  }                                                                        \
                                                                           \
Yaowu Xu's avatar
Yaowu Xu committed
1106
  unsigned int aom_highbd_10_obmc_variance##W##x##H##_c(                   \
clang-format's avatar
clang-format committed
1107
1108
1109
1110
1111
1112
1113
      const uint8_t *pre, int pre_stride, const int32_t *wsrc,             \
      const int32_t *mask, unsigned int *sse) {                            \
    int sum;                                                               \
    highbd_10_obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum); \
    return *sse - (((int64_t)sum * sum) / (W * H));                        \
  }                                                                        \
                                                                           \
Yaowu Xu's avatar
Yaowu Xu committed
1114
  unsigned int aom_highbd_12_obmc_variance##W##x##H##_c(                   \
clang-format's avatar
clang-format committed
1115
1116
1117
1118
1119
1120
      const uint8_t *pre, int pre_stride, const int32_t *wsrc,             \
      const int32_t *mask, unsigned int *sse) {                            \
    int sum;                                                               \
    highbd_12_obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum); \
    return *sse - (((int64_t)sum * sum) / (W * H));                        \
  }
1121

clang-format's avatar
clang-format committed
1122
#define HIGHBD_OBMC_SUBPIX_VAR(W, H)                                           \
Yaowu Xu's avatar
Yaowu Xu committed
1123
  unsigned int aom_highbd_obmc_sub_pixel_variance##W##x##H##_c(                \
clang-format's avatar
clang-format committed
1124
1125
1126
1127
1128
      const uint8_t *pre, int pre_stride, int xoffset, int yoffset,            \
      const int32_t *wsrc, const int32_t *mask, unsigned int *sse) {           \
    uint16_t fdata3[(H + 1) * W];                                              \
    uint16_t temp2[H * W];                                                     \
                                                                               \
Yaowu Xu's avatar
Yaowu Xu committed
1129
    aom_highbd_var_filter_block2d_bil_first_pass(                              \
clang-format's avatar
clang-format committed
1130
        pre, fdata3, pre_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);   \
Yaowu Xu's avatar
Yaowu Xu committed
1131
    aom_highbd_var_filter_block2d_bil_second_pass(                             \
clang-format's avatar
clang-format committed
1132
1133
        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);              \
                                                                               \
Yaowu Xu's avatar
Yaowu Xu committed
1134
    return aom_highbd_obmc_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
clang-format's avatar
clang-format committed
1135
1136
1137
                                                 wsrc, mask, sse);             \
  }                                                                            \
                                                                               \
Yaowu Xu's avatar
Yaowu Xu committed
1138
  unsigned int aom_highbd_10_obmc_sub_pixel_variance##W##x##H##_c(             \
clang-format's avatar
clang-format committed
1139
1140
1141
1142
1143
      const uint8_t *pre, int pre_stride, int xoffset, int yoffset,            \
      const int32_t *wsrc, const int32_t *mask, unsigned int *sse) {           \
    uint16_t fdata3[(H + 1) * W];                                              \
    uint16_t temp2[H * W];                                                     \
                                                                               \
Yaowu Xu's avatar
Yaowu Xu committed
1144
    aom_highbd_var_filter_block2d_bil_first_pass(                              \
clang-format's avatar
clang-format committed
1145
        pre, fdata3, pre_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);   \
Yaowu Xu's avatar
Yaowu Xu committed
1146
    aom_highbd_var_filter_block2d_bil_second_pass(                             \
clang-format's avatar
clang-format committed
1147
1148
        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);              \
                                                                               \
Yaowu Xu's avatar
Yaowu Xu committed
1149
    return aom_highbd_10_obmc_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
clang-format's avatar
clang-format committed
1150
1151
1152
                                                    W, wsrc, mask, sse);       \
  }                                                                            \
                                                                               \
Yaowu Xu's avatar
Yaowu Xu committed
1153
  unsigned int aom_highbd_12_obmc_sub_pixel_variance##W##x##H##_c(             \
clang-format's avatar
clang-format committed
1154
1155
1156
1157
1158
      const uint8_t *pre, int pre_stride, int xoffset, int yoffset,            \
      const int32_t *wsrc, const int32_t *mask, unsigned int *sse) {           \
    uint16_t fdata3[(H + 1) * W];                                              \
    uint16_t temp2[H * W];                                                     \
                                                                               \
Yaowu Xu's avatar
Yaowu Xu committed
1159
    aom_highbd_var_filter_block2d_bil_first_pass(                              \
clang-format's avatar
clang-format committed
1160
        pre, fdata3, pre_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);   \
Yaowu Xu's avatar
Yaowu Xu committed
1161
    aom_highbd_var_filter_block2d_bil_second_pass(                             \
clang-format's avatar
clang-format committed
1162
1163
        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);              \
                                                                               \
Yaowu Xu's avatar
Yaowu Xu committed
1164
    return aom_highbd_12_obmc_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
clang-format's avatar
clang-format committed
1165
1166
                                                    W, wsrc, mask, sse);       \
  }
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216

HIGHBD_OBMC_VAR(4, 4)
HIGHBD_OBMC_SUBPIX_VAR(4, 4)

HIGHBD_OBMC_VAR(4, 8)
HIGHBD_OBMC_SUBPIX_VAR(4, 8)

HIGHBD_OBMC_VAR(8, 4)
HIGHBD_OBMC_SUBPIX_VAR(8, 4)

HIGHBD_OBMC_VAR(8, 8)
HIGHBD_OBMC_SUBPIX_VAR(8, 8)

HIGHBD_OBMC_VAR(8, 16)
HIGHBD_OBMC_SUBPIX_VAR(8, 16)

HIGHBD_OBMC_VAR(16, 8)
HIGHBD_OBMC_SUBPIX_VAR(16, 8)

HIGHBD_OBMC_VAR(16, 16)
HIGHBD_OBMC_SUBPIX_VAR(16, 16)

HIGHBD_OBMC_VAR(16, 32)
HIGHBD_OBMC_SUBPIX_VAR(16, 32)

HIGHBD_OBMC_VAR(32, 16)
HIGHBD_OBMC_SUBPIX_VAR(32, 16)

HIGHBD_OBMC_VAR(32, 32)
HIGHBD_OBMC_SUBPIX_VAR(32, 32)

HIGHBD_OBMC_VAR(32, 64)
HIGHBD_OBMC_SUBPIX_VAR(32, 64)

HIGHBD_OBMC_VAR(64, 32)
HIGHBD_OBMC_SUBPIX_VAR(64, 32)

HIGHBD_OBMC_VAR(64, 64)
HIGHBD_OBMC_SUBPIX_VAR(64, 64)

#if CONFIG_EXT_PARTITION
HIGHBD_OBMC_VAR(64, 128)
HIGHBD_OBMC_SUBPIX_VAR(64, 128)

HIGHBD_OBMC_VAR(128, 64)
HIGHBD_OBMC_SUBPIX_VAR(128, 64)

HIGHBD_OBMC_VAR(128, 128)
HIGHBD_OBMC_SUBPIX_VAR(128, 128)
#endif  // CONFIG_EXT_PARTITION
Yaowu Xu's avatar
Yaowu Xu committed
1217
1218
#endif  // CONFIG_AOM_HIGHBITDEPTH
#endif  // CONFIG_AV1 && CONFIG_OBMC