variance.c 52.1 KB
Newer Older
Johann's avatar
Johann committed
1
/*
Yaowu Xu's avatar
Yaowu Xu committed
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Johann's avatar
Johann committed
3
 *
Yaowu Xu's avatar
Yaowu Xu committed
4
5
6
7
8
9
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Johann's avatar
Johann committed
10
 */
11
#include <stdlib.h>
Johann's avatar
Johann committed
12

Yaowu Xu's avatar
Yaowu Xu committed
13
14
#include "./aom_config.h"
#include "./aom_dsp_rtcd.h"
Johann's avatar
Johann committed
15

16
#include "aom_ports/mem.h"
Yaowu Xu's avatar
Yaowu Xu committed
17
#include "aom/aom_integer.h"
Johann's avatar
Johann committed
18

19
#include "aom_dsp/variance.h"
Yaowu Xu's avatar
Yaowu Xu committed
20
#include "aom_dsp/aom_filter.h"
Johann's avatar
Johann committed
21

Yaowu Xu's avatar
Yaowu Xu committed
22
uint32_t aom_get4x4sse_cs_c(const uint8_t *a, int a_stride, const uint8_t *b,
clang-format's avatar
clang-format committed
23
                            int b_stride) {
Johann's avatar
Johann committed
24
25
26
  int distortion = 0;
  int r, c;

Johann's avatar
Johann committed
27
28
  for (r = 0; r < 4; ++r) {
    for (c = 0; c < 4; ++c) {
Johann's avatar
Johann committed
29
30
31
32
33
34
35
36
37
38
39
      int diff = a[c] - b[c];
      distortion += diff * diff;
    }

    a += a_stride;
    b += b_stride;
  }

  return distortion;
}

Yaowu Xu's avatar
Yaowu Xu committed
40
uint32_t aom_get_mb_ss_c(const int16_t *a) {
Johann's avatar
Johann committed
41
42
43
44
45
46
47
48
49
  unsigned int i, sum = 0;

  for (i = 0; i < 256; ++i) {
    sum += a[i] * a[i];
  }

  return sum;
}

Yaowu Xu's avatar
Yaowu Xu committed
50
uint32_t aom_variance_halfpixvar16x16_h_c(const uint8_t *a, int a_stride,
Johann's avatar
Johann committed
51
52
                                          const uint8_t *b, int b_stride,
                                          uint32_t *sse) {
Yaowu Xu's avatar
Yaowu Xu committed
53
  return aom_sub_pixel_variance16x16_c(a, a_stride, 4, 0, b, b_stride, sse);
Johann's avatar
Johann committed
54
55
}

Yaowu Xu's avatar
Yaowu Xu committed
56
uint32_t aom_variance_halfpixvar16x16_v_c(const uint8_t *a, int a_stride,
Johann's avatar
Johann committed
57
58
                                          const uint8_t *b, int b_stride,
                                          uint32_t *sse) {
Yaowu Xu's avatar
Yaowu Xu committed
59
  return aom_sub_pixel_variance16x16_c(a, a_stride, 0, 4, b, b_stride, sse);
Johann's avatar
Johann committed
60
61
}

Yaowu Xu's avatar
Yaowu Xu committed
62
uint32_t aom_variance_halfpixvar16x16_hv_c(const uint8_t *a, int a_stride,
Johann's avatar
Johann committed
63
64
                                           const uint8_t *b, int b_stride,
                                           uint32_t *sse) {
Yaowu Xu's avatar
Yaowu Xu committed
65
  return aom_sub_pixel_variance16x16_c(a, a_stride, 4, 4, b, b_stride, sse);
Johann's avatar
Johann committed
66
67
}

clang-format's avatar
clang-format committed
68
69
static void variance(const uint8_t *a, int a_stride, const uint8_t *b,
                     int b_stride, int w, int h, uint32_t *sse, int *sum) {
Johann's avatar
Johann committed
70
71
72
73
74
  int i, j;

  *sum = 0;
  *sse = 0;

Johann's avatar
Johann committed
75
76
  for (i = 0; i < h; ++i) {
    for (j = 0; j < w; ++j) {
Johann's avatar
Johann committed
77
78
79
80
81
82
83
84
85
86
      const int diff = a[j] - b[j];
      *sum += diff;
      *sse += diff * diff;
    }

    a += a_stride;
    b += b_stride;
  }
}

Johann's avatar
Johann committed
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
// Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
// or vertical direction to produce the filtered output block. Used to implement
// the first-pass of 2-D separable filter.
//
// Produces int16_t output to retain precision for the next pass. Two filter
// taps should sum to FILTER_WEIGHT. pixel_step defines whether the filter is
// applied horizontally (pixel_step = 1) or vertically (pixel_step = stride).
// It defines the offset required to move from one input to the next.
static void var_filter_block2d_bil_first_pass(const uint8_t *a, uint16_t *b,
                                              unsigned int src_pixels_per_line,
                                              int pixel_step,
                                              unsigned int output_height,
                                              unsigned int output_width,
                                              const uint8_t *filter) {
  unsigned int i, j;

  for (i = 0; i < output_height; ++i) {
    for (j = 0; j < output_width; ++j) {
clang-format's avatar
clang-format committed
105
106
      b[j] = ROUND_POWER_OF_TWO(
          (int)a[0] * filter[0] + (int)a[pixel_step] * filter[1], FILTER_BITS);
Johann's avatar
Johann committed
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130

      ++a;
    }

    a += src_pixels_per_line - output_width;
    b += output_width;
  }
}

// Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
// or vertical direction to produce the filtered output block. Used to implement
// the second-pass of 2-D separable filter.
//
// Requires 16-bit input as produced by filter_block2d_bil_first_pass. Two
// filter taps should sum to FILTER_WEIGHT. pixel_step defines whether the
// filter is applied horizontally (pixel_step = 1) or vertically
// (pixel_step = stride). It defines the offset required to move from one input
// to the next. Output is 8-bit.
static void var_filter_block2d_bil_second_pass(const uint16_t *a, uint8_t *b,
                                               unsigned int src_pixels_per_line,
                                               unsigned int pixel_step,
                                               unsigned int output_height,
                                               unsigned int output_width,
                                               const uint8_t *filter) {
clang-format's avatar
clang-format committed
131
  unsigned int i, j;
Johann's avatar
Johann committed
132
133
134

  for (i = 0; i < output_height; ++i) {
    for (j = 0; j < output_width; ++j) {
clang-format's avatar
clang-format committed
135
136
      b[j] = ROUND_POWER_OF_TWO(
          (int)a[0] * filter[0] + (int)a[pixel_step] * filter[1], FILTER_BITS);
Johann's avatar
Johann committed
137
138
139
140
141
142
143
144
      ++a;
    }

    a += src_pixels_per_line - output_width;
    b += output_width;
  }
}

clang-format's avatar
clang-format committed
145
#define VAR(W, H)                                                    \
Yaowu Xu's avatar
Yaowu Xu committed
146
  uint32_t aom_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
clang-format's avatar
clang-format committed
147
148
149
150
151
152
                                     const uint8_t *b, int b_stride, \
                                     uint32_t *sse) {                \
    int sum;                                                         \
    variance(a, a_stride, b, b_stride, W, H, sse, &sum);             \
    return *sse - (((int64_t)sum * sum) / (W * H));                  \
  }
Johann's avatar
Johann committed
153

clang-format's avatar
clang-format committed
154
#define SUBPIX_VAR(W, H)                                                \
Yaowu Xu's avatar
Yaowu Xu committed
155
  uint32_t aom_sub_pixel_variance##W##x##H##_c(                         \
clang-format's avatar
clang-format committed
156
157
158
159
160
161
162
163
164
165
      const uint8_t *a, int a_stride, int xoffset, int yoffset,         \
      const uint8_t *b, int b_stride, uint32_t *sse) {                  \
    uint16_t fdata3[(H + 1) * W];                                       \
    uint8_t temp2[H * W];                                               \
                                                                        \
    var_filter_block2d_bil_first_pass(a, fdata3, a_stride, 1, H + 1, W, \
                                      bilinear_filters_2t[xoffset]);    \
    var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,       \
                                       bilinear_filters_2t[yoffset]);   \
                                                                        \
Yaowu Xu's avatar
Yaowu Xu committed
166
    return aom_variance##W##x##H##_c(temp2, W, b, b_stride, sse);       \
clang-format's avatar
clang-format committed
167
  }
Johann's avatar
Johann committed
168

clang-format's avatar
clang-format committed
169
#define SUBPIX_AVG_VAR(W, H)                                            \
Yaowu Xu's avatar
Yaowu Xu committed
170
  uint32_t aom_sub_pixel_avg_variance##W##x##H##_c(                     \
clang-format's avatar
clang-format committed
171
172
173
174
175
176
177
178
179
180
181
182
      const uint8_t *a, int a_stride, int xoffset, int yoffset,         \
      const uint8_t *b, int b_stride, uint32_t *sse,                    \
      const uint8_t *second_pred) {                                     \
    uint16_t fdata3[(H + 1) * W];                                       \
    uint8_t temp2[H * W];                                               \
    DECLARE_ALIGNED(16, uint8_t, temp3[H * W]);                         \
                                                                        \
    var_filter_block2d_bil_first_pass(a, fdata3, a_stride, 1, H + 1, W, \
                                      bilinear_filters_2t[xoffset]);    \
    var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,       \
                                       bilinear_filters_2t[yoffset]);   \
                                                                        \
Yaowu Xu's avatar
Yaowu Xu committed
183
    aom_comp_avg_pred(temp3, second_pred, W, H, temp2, W);              \
clang-format's avatar
clang-format committed
184
                                                                        \
Yaowu Xu's avatar
Yaowu Xu committed
185
    return aom_variance##W##x##H##_c(temp3, W, b, b_stride, sse);       \
clang-format's avatar
clang-format committed
186
  }
Johann's avatar
Johann committed
187

Johann's avatar
Johann committed
188
189
190
191
/* Identical to the variance call except it takes an additional parameter, sum,
 * and returns that value using pass-by-reference instead of returning
 * sse - sum^2 / w*h
 */
clang-format's avatar
clang-format committed
192
#define GET_VAR(W, H)                                                         \
Yaowu Xu's avatar
Yaowu Xu committed
193
  void aom_get##W##x##H##var_c(const uint8_t *a, int a_stride,                \
clang-format's avatar
clang-format committed
194
195
196
197
                               const uint8_t *b, int b_stride, uint32_t *sse, \
                               int *sum) {                                    \
    variance(a, a_stride, b, b_stride, W, H, sse, sum);                       \
  }
Johann's avatar
Johann committed
198
199
200
201
202

/* Identical to the variance call except it does not calculate the
 * sse - sum^2 / w*h and returns sse in addtion to modifying the passed in
 * variable.
 */
clang-format's avatar
clang-format committed
203
#define MSE(W, H)                                               \
Yaowu Xu's avatar
Yaowu Xu committed
204
  uint32_t aom_mse##W##x##H##_c(const uint8_t *a, int a_stride, \
clang-format's avatar
clang-format committed
205
206
207
208
209
210
                                const uint8_t *b, int b_stride, \
                                uint32_t *sse) {                \
    int sum;                                                    \
    variance(a, a_stride, b, b_stride, W, H, sse, &sum);        \
    return *sse;                                                \
  }
Johann's avatar
Johann committed
211

Johann's avatar
Johann committed
212
213
/* All three forms of the variance are available in the same sizes. */
#define VARIANCES(W, H) \
clang-format's avatar
clang-format committed
214
215
216
  VAR(W, H)             \
  SUBPIX_VAR(W, H)      \
  SUBPIX_AVG_VAR(W, H)
Johann's avatar
Johann committed
217

Yaowu Xu's avatar
Yaowu Xu committed
218
#if CONFIG_AV1 && CONFIG_EXT_PARTITION
219
220
221
VARIANCES(128, 128)
VARIANCES(128, 64)
VARIANCES(64, 128)
Yaowu Xu's avatar
Yaowu Xu committed
222
#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
Johann's avatar
Johann committed
223
224
225
226
227
228
229
230
231
232
233
234
235
VARIANCES(64, 64)
VARIANCES(64, 32)
VARIANCES(32, 64)
VARIANCES(32, 32)
VARIANCES(32, 16)
VARIANCES(16, 32)
VARIANCES(16, 16)
VARIANCES(16, 8)
VARIANCES(8, 16)
VARIANCES(8, 8)
VARIANCES(8, 4)
VARIANCES(4, 8)
VARIANCES(4, 4)
Johann's avatar
Johann committed
236
237
238
239
240
241
242
243
244

GET_VAR(16, 16)
GET_VAR(8, 8)

MSE(16, 16)
MSE(16, 8)
MSE(8, 16)
MSE(8, 8)

Yaowu Xu's avatar
Yaowu Xu committed
245
void aom_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width,
clang-format's avatar
clang-format committed
246
                         int height, const uint8_t *ref, int ref_stride) {
Johann's avatar
Johann committed
247
248
  int i, j;

Johann's avatar
Johann committed
249
250
  for (i = 0; i < height; ++i) {
    for (j = 0; j < width; ++j) {
Johann's avatar
Johann committed
251
252
253
254
255
256
257
258
259
      const int tmp = pred[j] + ref[j];
      comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
    }
    comp_pred += width;
    pred += width;
    ref += ref_stride;
  }
}

260
// Get pred block from up-sampled reference.
Yaowu Xu's avatar
Yaowu Xu committed
261
void aom_upsampled_pred_c(uint8_t *comp_pred, int width, int height,
clang-format's avatar
clang-format committed
262
263
264
265
266
267
268
                          const uint8_t *ref, int ref_stride) {
  int i, j, k;
  int stride = ref_stride << 3;

  for (i = 0; i < height; i++) {
    for (j = 0, k = 0; j < width; j++, k += 8) {
      comp_pred[j] = ref[k];
269
    }
clang-format's avatar
clang-format committed
270
271
272
    comp_pred += width;
    ref += stride;
  }
273
274
}

Yaowu Xu's avatar
Yaowu Xu committed
275
void aom_comp_avg_upsampled_pred_c(uint8_t *comp_pred, const uint8_t *pred,
clang-format's avatar
clang-format committed
276
277
278
279
280
281
282
283
284
                                   int width, int height, const uint8_t *ref,
                                   int ref_stride) {
  int i, j;
  int stride = ref_stride << 3;

  for (i = 0; i < height; i++) {
    for (j = 0; j < width; j++) {
      const int tmp = ref[(j << 3)] + pred[j];
      comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
285
    }
clang-format's avatar
clang-format committed
286
287
288
289
    comp_pred += width;
    pred += width;
    ref += stride;
  }
290
291
}

Yaowu Xu's avatar
Yaowu Xu committed
292
#if CONFIG_AOM_HIGHBITDEPTH
clang-format's avatar
clang-format committed
293
294
295
static void highbd_variance64(const uint8_t *a8, int a_stride,
                              const uint8_t *b8, int b_stride, int w, int h,
                              uint64_t *sse, int64_t *sum) {
Johann's avatar
Johann committed
296
297
298
299
300
301
302
  int i, j;

  uint16_t *a = CONVERT_TO_SHORTPTR(a8);
  uint16_t *b = CONVERT_TO_SHORTPTR(b8);
  *sum = 0;
  *sse = 0;

Johann's avatar
Johann committed
303
304
  for (i = 0; i < h; ++i) {
    for (j = 0; j < w; ++j) {
Johann's avatar
Johann committed
305
306
307
308
309
310
311
312
313
      const int diff = a[j] - b[j];
      *sum += diff;
      *sse += diff * diff;
    }
    a += a_stride;
    b += b_stride;
  }
}

clang-format's avatar
clang-format committed
314
315
316
static void highbd_8_variance(const uint8_t *a8, int a_stride,
                              const uint8_t *b8, int b_stride, int w, int h,
                              uint32_t *sse, int *sum) {
Johann's avatar
Johann committed
317
  uint64_t sse_long = 0;
318
  int64_t sum_long = 0;
Johann's avatar
Johann committed
319
  highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
Johann's avatar
Johann committed
320
  *sse = (uint32_t)sse_long;
Johann's avatar
Johann committed
321
322
323
  *sum = (int)sum_long;
}

clang-format's avatar
clang-format committed
324
325
326
static void highbd_10_variance(const uint8_t *a8, int a_stride,
                               const uint8_t *b8, int b_stride, int w, int h,
                               uint32_t *sse, int *sum) {
Johann's avatar
Johann committed
327
  uint64_t sse_long = 0;
328
  int64_t sum_long = 0;
Johann's avatar
Johann committed
329
  highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
Johann's avatar
Johann committed
330
  *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 4);
Johann's avatar
Johann committed
331
332
333
  *sum = (int)ROUND_POWER_OF_TWO(sum_long, 2);
}

clang-format's avatar
clang-format committed
334
335
336
static void highbd_12_variance(const uint8_t *a8, int a_stride,
                               const uint8_t *b8, int b_stride, int w, int h,
                               uint32_t *sse, int *sum) {
Johann's avatar
Johann committed
337
  uint64_t sse_long = 0;
338
  int64_t sum_long = 0;
Johann's avatar
Johann committed
339
  highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
Johann's avatar
Johann committed
340
  *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 8);
Johann's avatar
Johann committed
341
342
343
  *sum = (int)ROUND_POWER_OF_TWO(sum_long, 4);
}

clang-format's avatar
clang-format committed
344
#define HIGHBD_VAR(W, H)                                                       \
Yaowu Xu's avatar
Yaowu Xu committed
345
  uint32_t aom_highbd_8_variance##W##x##H##_c(const uint8_t *a, int a_stride,  \
clang-format's avatar
clang-format committed
346
347
348
349
350
351
352
                                              const uint8_t *b, int b_stride,  \
                                              uint32_t *sse) {                 \
    int sum;                                                                   \
    highbd_8_variance(a, a_stride, b, b_stride, W, H, sse, &sum);              \
    return *sse - (((int64_t)sum * sum) / (W * H));                            \
  }                                                                            \
                                                                               \
Yaowu Xu's avatar
Yaowu Xu committed
353
  uint32_t aom_highbd_10_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
clang-format's avatar
clang-format committed
354
355
356
357
358
359
360
361
362
                                               const uint8_t *b, int b_stride, \
                                               uint32_t *sse) {                \
    int sum;                                                                   \
    int64_t var;                                                               \
    highbd_10_variance(a, a_stride, b, b_stride, W, H, sse, &sum);             \
    var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H));                  \
    return (var >= 0) ? (uint32_t)var : 0;                                     \
  }                                                                            \
                                                                               \
Yaowu Xu's avatar
Yaowu Xu committed
363
  uint32_t aom_highbd_12_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
clang-format's avatar
clang-format committed
364
365
366
367
368
369
370
371
                                               const uint8_t *b, int b_stride, \
                                               uint32_t *sse) {                \
    int sum;                                                                   \
    int64_t var;                                                               \
    highbd_12_variance(a, a_stride, b, b_stride, W, H, sse, &sum);             \
    var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H));                  \
    return (var >= 0) ? (uint32_t)var : 0;                                     \
  }
Johann's avatar
Johann committed
372

clang-format's avatar
clang-format committed
373
#define HIGHBD_GET_VAR(S)                                                    \
Yaowu Xu's avatar
Yaowu Xu committed
374
  void aom_highbd_8_get##S##x##S##var_c(const uint8_t *src, int src_stride,  \
clang-format's avatar
clang-format committed
375
376
377
378
379
                                        const uint8_t *ref, int ref_stride,  \
                                        uint32_t *sse, int *sum) {           \
    highbd_8_variance(src, src_stride, ref, ref_stride, S, S, sse, sum);     \
  }                                                                          \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
380
  void aom_highbd_10_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
clang-format's avatar
clang-format committed
381
382
383
384
385
                                         const uint8_t *ref, int ref_stride, \
                                         uint32_t *sse, int *sum) {          \
    highbd_10_variance(src, src_stride, ref, ref_stride, S, S, sse, sum);    \
  }                                                                          \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
386
  void aom_highbd_12_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
clang-format's avatar
clang-format committed
387
388
389
390
                                         const uint8_t *ref, int ref_stride, \
                                         uint32_t *sse, int *sum) {          \
    highbd_12_variance(src, src_stride, ref, ref_stride, S, S, sse, sum);    \
  }
Johann's avatar
Johann committed
391

clang-format's avatar
clang-format committed
392
#define HIGHBD_MSE(W, H)                                                      \
Yaowu Xu's avatar
Yaowu Xu committed
393
  uint32_t aom_highbd_8_mse##W##x##H##_c(const uint8_t *src, int src_stride,  \
clang-format's avatar
clang-format committed
394
395
396
397
398
399
400
                                         const uint8_t *ref, int ref_stride,  \
                                         uint32_t *sse) {                     \
    int sum;                                                                  \
    highbd_8_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum);     \
    return *sse;                                                              \
  }                                                                           \
                                                                              \
Yaowu Xu's avatar
Yaowu Xu committed
401
  uint32_t aom_highbd_10_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
clang-format's avatar
clang-format committed
402
403
404
405
406
407
408
                                          const uint8_t *ref, int ref_stride, \
                                          uint32_t *sse) {                    \
    int sum;                                                                  \
    highbd_10_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum);    \
    return *sse;                                                              \
  }                                                                           \
                                                                              \
Yaowu Xu's avatar
Yaowu Xu committed
409
  uint32_t aom_highbd_12_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
clang-format's avatar
clang-format committed
410
411
412
413
414
415
                                          const uint8_t *ref, int ref_stride, \
                                          uint32_t *sse) {                    \
    int sum;                                                                  \
    highbd_12_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum);    \
    return *sse;                                                              \
  }
Johann's avatar
Johann committed
416

Yaowu Xu's avatar
Yaowu Xu committed
417
void aom_highbd_var_filter_block2d_bil_first_pass(
clang-format's avatar
clang-format committed
418
419
420
    const uint8_t *src_ptr8, uint16_t *output_ptr,
    unsigned int src_pixels_per_line, int pixel_step,
    unsigned int output_height, unsigned int output_width,
Johann's avatar
Johann committed
421
422
423
424
425
    const uint8_t *filter) {
  unsigned int i, j;
  uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8);
  for (i = 0; i < output_height; ++i) {
    for (j = 0; j < output_width; ++j) {
clang-format's avatar
clang-format committed
426
427
428
      output_ptr[j] = ROUND_POWER_OF_TWO(
          (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
          FILTER_BITS);
Johann's avatar
Johann committed
429
430
431
432
433
434
435
436
437
438

      ++src_ptr;
    }

    // Next row...
    src_ptr += src_pixels_per_line - output_width;
    output_ptr += output_width;
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
439
void aom_highbd_var_filter_block2d_bil_second_pass(
clang-format's avatar
clang-format committed
440
441
442
    const uint16_t *src_ptr, uint16_t *output_ptr,
    unsigned int src_pixels_per_line, unsigned int pixel_step,
    unsigned int output_height, unsigned int output_width,
Johann's avatar
Johann committed
443
    const uint8_t *filter) {
clang-format's avatar
clang-format committed
444
  unsigned int i, j;
Johann's avatar
Johann committed
445
446
447

  for (i = 0; i < output_height; ++i) {
    for (j = 0; j < output_width; ++j) {
clang-format's avatar
clang-format committed
448
449
450
      output_ptr[j] = ROUND_POWER_OF_TWO(
          (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
          FILTER_BITS);
Johann's avatar
Johann committed
451
452
453
454
455
456
457
458
      ++src_ptr;
    }

    src_ptr += src_pixels_per_line - output_width;
    output_ptr += output_width;
  }
}

clang-format's avatar
clang-format committed
459
#define HIGHBD_SUBPIX_VAR(W, H)                                              \
Yaowu Xu's avatar
Yaowu Xu committed
460
  uint32_t aom_highbd_8_sub_pixel_variance##W##x##H##_c(                     \
clang-format's avatar
clang-format committed
461
462
463
464
465
      const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
      const uint8_t *dst, int dst_stride, uint32_t *sse) {                   \
    uint16_t fdata3[(H + 1) * W];                                            \
    uint16_t temp2[H * W];                                                   \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
466
    aom_highbd_var_filter_block2d_bil_first_pass(                            \
clang-format's avatar
clang-format committed
467
        src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
Yaowu Xu's avatar
Yaowu Xu committed
468
    aom_highbd_var_filter_block2d_bil_second_pass(                           \
clang-format's avatar
clang-format committed
469
470
        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);            \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
471
    return aom_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W,  \
clang-format's avatar
clang-format committed
472
473
474
                                              dst, dst_stride, sse);         \
  }                                                                          \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
475
  uint32_t aom_highbd_10_sub_pixel_variance##W##x##H##_c(                    \
clang-format's avatar
clang-format committed
476
477
478
479
480
      const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
      const uint8_t *dst, int dst_stride, uint32_t *sse) {                   \
    uint16_t fdata3[(H + 1) * W];                                            \
    uint16_t temp2[H * W];                                                   \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
481
    aom_highbd_var_filter_block2d_bil_first_pass(                            \
clang-format's avatar
clang-format committed
482
        src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
Yaowu Xu's avatar
Yaowu Xu committed
483
    aom_highbd_var_filter_block2d_bil_second_pass(                           \
clang-format's avatar
clang-format committed
484
485
        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);            \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
486
    return aom_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
clang-format's avatar
clang-format committed
487
488
489
                                               dst, dst_stride, sse);        \
  }                                                                          \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
490
  uint32_t aom_highbd_12_sub_pixel_variance##W##x##H##_c(                    \
clang-format's avatar
clang-format committed
491
492
493
494
495
      const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
      const uint8_t *dst, int dst_stride, uint32_t *sse) {                   \
    uint16_t fdata3[(H + 1) * W];                                            \
    uint16_t temp2[H * W];                                                   \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
496
    aom_highbd_var_filter_block2d_bil_first_pass(                            \
clang-format's avatar
clang-format committed
497
        src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
Yaowu Xu's avatar
Yaowu Xu committed
498
    aom_highbd_var_filter_block2d_bil_second_pass(                           \
clang-format's avatar
clang-format committed
499
500
        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);            \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
501
    return aom_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
clang-format's avatar
clang-format committed
502
503
                                               dst, dst_stride, sse);        \
  }
Johann's avatar
Johann committed
504

clang-format's avatar
clang-format committed
505
#define HIGHBD_SUBPIX_AVG_VAR(W, H)                                          \
Yaowu Xu's avatar
Yaowu Xu committed
506
  uint32_t aom_highbd_8_sub_pixel_avg_variance##W##x##H##_c(                 \
clang-format's avatar
clang-format committed
507
508
509
510
511
512
513
      const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
      const uint8_t *dst, int dst_stride, uint32_t *sse,                     \
      const uint8_t *second_pred) {                                          \
    uint16_t fdata3[(H + 1) * W];                                            \
    uint16_t temp2[H * W];                                                   \
    DECLARE_ALIGNED(16, uint16_t, temp3[H * W]);                             \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
514
    aom_highbd_var_filter_block2d_bil_first_pass(                            \
clang-format's avatar
clang-format committed
515
        src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
Yaowu Xu's avatar
Yaowu Xu committed
516
    aom_highbd_var_filter_block2d_bil_second_pass(                           \
clang-format's avatar
clang-format committed
517
518
        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);            \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
519
    aom_highbd_comp_avg_pred_c(temp3, second_pred, W, H,                     \
clang-format's avatar
clang-format committed
520
521
                               CONVERT_TO_BYTEPTR(temp2), W);                \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
522
    return aom_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W,  \
clang-format's avatar
clang-format committed
523
524
525
                                              dst, dst_stride, sse);         \
  }                                                                          \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
526
  uint32_t aom_highbd_10_sub_pixel_avg_variance##W##x##H##_c(                \
clang-format's avatar
clang-format committed
527
528
529
530
531
532
533
      const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
      const uint8_t *dst, int dst_stride, uint32_t *sse,                     \
      const uint8_t *second_pred) {                                          \
    uint16_t fdata3[(H + 1) * W];                                            \
    uint16_t temp2[H * W];                                                   \
    DECLARE_ALIGNED(16, uint16_t, temp3[H * W]);                             \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
534
    aom_highbd_var_filter_block2d_bil_first_pass(                            \
clang-format's avatar
clang-format committed
535
        src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
Yaowu Xu's avatar
Yaowu Xu committed
536
    aom_highbd_var_filter_block2d_bil_second_pass(                           \
clang-format's avatar
clang-format committed
537
538
        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);            \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
539
    aom_highbd_comp_avg_pred_c(temp3, second_pred, W, H,                     \
clang-format's avatar
clang-format committed
540
541
                               CONVERT_TO_BYTEPTR(temp2), W);                \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
542
    return aom_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
clang-format's avatar
clang-format committed
543
544
545
                                               dst, dst_stride, sse);        \
  }                                                                          \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
546
  uint32_t aom_highbd_12_sub_pixel_avg_variance##W##x##H##_c(                \
clang-format's avatar
clang-format committed
547
548
549
550
551
552
553
      const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
      const uint8_t *dst, int dst_stride, uint32_t *sse,                     \
      const uint8_t *second_pred) {                                          \
    uint16_t fdata3[(H + 1) * W];                                            \
    uint16_t temp2[H * W];                                                   \
    DECLARE_ALIGNED(16, uint16_t, temp3[H * W]);                             \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
554
    aom_highbd_var_filter_block2d_bil_first_pass(                            \
clang-format's avatar
clang-format committed
555
        src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
Yaowu Xu's avatar
Yaowu Xu committed
556
    aom_highbd_var_filter_block2d_bil_second_pass(                           \
clang-format's avatar
clang-format committed
557
558
        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);            \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
559
    aom_highbd_comp_avg_pred_c(temp3, second_pred, W, H,                     \
clang-format's avatar
clang-format committed
560
561
                               CONVERT_TO_BYTEPTR(temp2), W);                \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
562
    return aom_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
clang-format's avatar
clang-format committed
563
564
                                               dst, dst_stride, sse);        \
  }
Johann's avatar
Johann committed
565
566
567

/* All three forms of the variance are available in the same sizes. */
#define HIGHBD_VARIANCES(W, H) \
clang-format's avatar
clang-format committed
568
569
570
  HIGHBD_VAR(W, H)             \
  HIGHBD_SUBPIX_VAR(W, H)      \
  HIGHBD_SUBPIX_AVG_VAR(W, H)
Johann's avatar
Johann committed
571

Yaowu Xu's avatar
Yaowu Xu committed
572
#if CONFIG_AV1 && CONFIG_EXT_PARTITION
573
574
575
HIGHBD_VARIANCES(128, 128)
HIGHBD_VARIANCES(128, 64)
HIGHBD_VARIANCES(64, 128)
Yaowu Xu's avatar
Yaowu Xu committed
576
#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
Johann's avatar
Johann committed
577
578
579
580
581
582
583
584
585
586
587
588
589
590
HIGHBD_VARIANCES(64, 64)
HIGHBD_VARIANCES(64, 32)
HIGHBD_VARIANCES(32, 64)
HIGHBD_VARIANCES(32, 32)
HIGHBD_VARIANCES(32, 16)
HIGHBD_VARIANCES(16, 32)
HIGHBD_VARIANCES(16, 16)
HIGHBD_VARIANCES(16, 8)
HIGHBD_VARIANCES(8, 16)
HIGHBD_VARIANCES(8, 8)
HIGHBD_VARIANCES(8, 4)
HIGHBD_VARIANCES(4, 8)
HIGHBD_VARIANCES(4, 4)

Johann's avatar
Johann committed
591
592
593
594
595
596
597
598
HIGHBD_GET_VAR(8)
HIGHBD_GET_VAR(16)

HIGHBD_MSE(16, 16)
HIGHBD_MSE(16, 8)
HIGHBD_MSE(8, 16)
HIGHBD_MSE(8, 8)

Yaowu Xu's avatar
Yaowu Xu committed
599
void aom_highbd_comp_avg_pred_c(uint16_t *comp_pred, const uint8_t *pred8,
600
601
                                int width, int height, const uint8_t *ref8,
                                int ref_stride) {
Johann's avatar
Johann committed
602
603
604
  int i, j;
  uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
Johann's avatar
Johann committed
605
606
  for (i = 0; i < height; ++i) {
    for (j = 0; j < width; ++j) {
Johann's avatar
Johann committed
607
608
609
610
611
612
613
614
      const int tmp = pred[j] + ref[j];
      comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
    }
    comp_pred += width;
    pred += width;
    ref += ref_stride;
  }
}
615

Yaowu Xu's avatar
Yaowu Xu committed
616
void aom_highbd_upsampled_pred_c(uint16_t *comp_pred, int width, int height,
clang-format's avatar
clang-format committed
617
                                 const uint8_t *ref8, int ref_stride) {
618
619
620
621
622
623
624
625
626
627
628
629
630
  int i, j;
  int stride = ref_stride << 3;

  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
  for (i = 0; i < height; ++i) {
    for (j = 0; j < width; ++j) {
      comp_pred[j] = ref[(j << 3)];
    }
    comp_pred += width;
    ref += stride;
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
631
void aom_highbd_comp_avg_upsampled_pred_c(uint16_t *comp_pred,
clang-format's avatar
clang-format committed
632
633
                                          const uint8_t *pred8, int width,
                                          int height, const uint8_t *ref8,
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
                                          int ref_stride) {
  int i, j;
  int stride = ref_stride << 3;

  uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
  for (i = 0; i < height; ++i) {
    for (j = 0; j < width; ++j) {
      const int tmp = pred[j] + ref[(j << 3)];
      comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
    }
    comp_pred += width;
    pred += width;
    ref += stride;
  }
}
Yaowu Xu's avatar
Yaowu Xu committed
650
#endif  // CONFIG_AOM_HIGHBITDEPTH
651

Yaowu Xu's avatar
Yaowu Xu committed
652
#if CONFIG_AV1 && CONFIG_EXT_INTER
clang-format's avatar
clang-format committed
653
654
655
void masked_variance(const uint8_t *a, int a_stride, const uint8_t *b,
                     int b_stride, const uint8_t *m, int m_stride, int w, int h,
                     unsigned int *sse, int *sum) {
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
  int i, j;

  int64_t sum64 = 0;
  uint64_t sse64 = 0;

  for (i = 0; i < h; i++) {
    for (j = 0; j < w; j++) {
      const int diff = (a[j] - b[j]) * (m[j]);
      sum64 += diff;
      sse64 += diff * diff;
    }

    a += a_stride;
    b += b_stride;
    m += m_stride;
  }
clang-format's avatar
clang-format committed
672
  sum64 = (sum64 >= 0) ? sum64 : -sum64;
Yaowu Xu's avatar
Yaowu Xu committed
673
674
  *sum = (int)ROUND_POWER_OF_TWO(sum64, 6);
  *sse = (uint32_t)ROUND_POWER_OF_TWO(sse64, 12);
675
676
}

clang-format's avatar
clang-format committed
677
#define MASK_VAR(W, H)                                                       \
Yaowu Xu's avatar
Yaowu Xu committed
678
  unsigned int aom_masked_variance##W##x##H##_c(                             \
clang-format's avatar
clang-format committed
679
680
681
682
683
684
      const uint8_t *a, int a_stride, const uint8_t *b, int b_stride,        \
      const uint8_t *m, int m_stride, unsigned int *sse) {                   \
    int sum;                                                                 \
    masked_variance(a, a_stride, b, b_stride, m, m_stride, W, H, sse, &sum); \
    return *sse - (((int64_t)sum * sum) / (W * H));                          \
  }
685

clang-format's avatar
clang-format committed
686
#define MASK_SUBPIX_VAR(W, H)                                                 \
Yaowu Xu's avatar
Yaowu Xu committed
687
  unsigned int aom_masked_sub_pixel_variance##W##x##H##_c(                    \
clang-format's avatar
clang-format committed
688
689
690
691
692
693
694
695
696
697
698
      const uint8_t *src, int src_stride, int xoffset, int yoffset,           \
      const uint8_t *dst, int dst_stride, const uint8_t *msk, int msk_stride, \
      unsigned int *sse) {                                                    \
    uint16_t fdata3[(H + 1) * W];                                             \
    uint8_t temp2[H * W];                                                     \
                                                                              \
    var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W,   \
                                      bilinear_filters_2t[xoffset]);          \
    var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,             \
                                       bilinear_filters_2t[yoffset]);         \
                                                                              \
Yaowu Xu's avatar
Yaowu Xu committed
699
    return aom_masked_variance##W##x##H##_c(temp2, W, dst, dst_stride, msk,   \
clang-format's avatar
clang-format committed
700
701
                                            msk_stride, sse);                 \
  }
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752

MASK_VAR(4, 4)
MASK_SUBPIX_VAR(4, 4)

MASK_VAR(4, 8)
MASK_SUBPIX_VAR(4, 8)

MASK_VAR(8, 4)
MASK_SUBPIX_VAR(8, 4)

MASK_VAR(8, 8)
MASK_SUBPIX_VAR(8, 8)

MASK_VAR(8, 16)
MASK_SUBPIX_VAR(8, 16)

MASK_VAR(16, 8)
MASK_SUBPIX_VAR(16, 8)

MASK_VAR(16, 16)
MASK_SUBPIX_VAR(16, 16)

MASK_VAR(16, 32)
MASK_SUBPIX_VAR(16, 32)

MASK_VAR(32, 16)
MASK_SUBPIX_VAR(32, 16)

MASK_VAR(32, 32)
MASK_SUBPIX_VAR(32, 32)

MASK_VAR(32, 64)
MASK_SUBPIX_VAR(32, 64)

MASK_VAR(64, 32)
MASK_SUBPIX_VAR(64, 32)

MASK_VAR(64, 64)
MASK_SUBPIX_VAR(64, 64)

#if CONFIG_EXT_PARTITION
MASK_VAR(64, 128)
MASK_SUBPIX_VAR(64, 128)

MASK_VAR(128, 64)
MASK_SUBPIX_VAR(128, 64)

MASK_VAR(128, 128)
MASK_SUBPIX_VAR(128, 128)
#endif  // CONFIG_EXT_PARTITION

Yaowu Xu's avatar
Yaowu Xu committed
753
#if CONFIG_AOM_HIGHBITDEPTH
clang-format's avatar
clang-format committed
754
755
756
757
void highbd_masked_variance64(const uint8_t *a8, int a_stride,
                              const uint8_t *b8, int b_stride, const uint8_t *m,
                              int m_stride, int w, int h, uint64_t *sse,
                              int64_t *sum) {
758
759
760
761
  int i, j;
  uint16_t *a = CONVERT_TO_SHORTPTR(a8);
  uint16_t *b = CONVERT_TO_SHORTPTR(b8);

762
763
  *sum = 0;
  *sse = 0;
764
765
766
767

  for (i = 0; i < h; i++) {
    for (j = 0; j < w; j++) {
      const int diff = (a[j] - b[j]) * (m[j]);
768
769
      *sum += (int64_t)diff;
      *sse += (int64_t)diff * diff;
770
771
772
773
774
775
    }

    a += a_stride;
    b += b_stride;
    m += m_stride;
  }
clang-format's avatar
clang-format committed
776
  *sum = (*sum >= 0) ? *sum : -*sum;
777
778
  *sum = ROUND_POWER_OF_TWO(*sum, 6);
  *sse = ROUND_POWER_OF_TWO(*sse, 12);
779
780
}

clang-format's avatar
clang-format committed
781
782
783
void highbd_masked_variance(const uint8_t *a8, int a_stride, const uint8_t *b8,
                            int b_stride, const uint8_t *m, int m_stride, int w,
                            int h, unsigned int *sse, int *sum) {
784
  int64_t sum64;
785
  uint64_t sse64;
clang-format's avatar
clang-format committed
786
787
  highbd_masked_variance64(a8, a_stride, b8, b_stride, m, m_stride, w, h,
                           &sse64, &sum64);
788
  *sum = (int)sum64;
789
790
791
  *sse = (unsigned int)sse64;
}

clang-format's avatar
clang-format committed
792
793
794
void highbd_10_masked_variance(const uint8_t *a8, int a_stride,
                               const uint8_t *b8, int b_stride,
                               const uint8_t *m, int m_stride, int w, int h,
795
                               unsigned int *sse, int *sum) {
796
  int64_t sum64;
797
  uint64_t sse64;
clang-format's avatar
clang-format committed
798
799
  highbd_masked_variance64(a8, a_stride, b8, b_stride, m, m_stride, w, h,
                           &sse64, &sum64);
800
  *sum = (int)ROUND_POWER_OF_TWO(sum64, 2);
801
802
803
  *sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 4);
}

clang-format's avatar
clang-format committed
804
805
806
void highbd_12_masked_variance(const uint8_t *a8, int a_stride,
                               const uint8_t *b8, int b_stride,
                               const uint8_t *m, int m_stride, int w, int h,
807
                               unsigned int *sse, int *sum) {
808
  int64_t sum64;
809
  uint64_t sse64;
clang-format's avatar
clang-format committed
810
811
  highbd_masked_variance64(a8, a_stride, b8, b_stride, m, m_stride, w, h,
                           &sse64, &sum64);
812
  *sum = (int)ROUND_POWER_OF_TWO(sum64, 4);
813
814
815
  *sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 8);
}

clang-format's avatar
clang-format committed
816
#define HIGHBD_MASK_VAR(W, H)                                                \
Yaowu Xu's avatar
Yaowu Xu committed
817
  unsigned int aom_highbd_masked_variance##W##x##H##_c(                      \
clang-format's avatar
clang-format committed
818
819
820
821
822
823
824
825
      const uint8_t *a, int a_stride, const uint8_t *b, int b_stride,        \
      const uint8_t *m, int m_stride, unsigned int *sse) {                   \
    int sum;                                                                 \
    highbd_masked_variance(a, a_stride, b, b_stride, m, m_stride, W, H, sse, \
                           &sum);                                            \
    return *sse - (((int64_t)sum * sum) / (W * H));                          \
  }                                                                          \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
826
  unsigned int aom_highbd_10_masked_variance##W##x##H##_c(                   \
clang-format's avatar
clang-format committed
827
828
829
830
831
832
833
834
      const uint8_t *a, int a_stride, const uint8_t *b, int b_stride,        \
      const uint8_t *m, int m_stride, unsigned int *sse) {                   \
    int sum;                                                                 \
    highbd_10_masked_variance(a, a_stride, b, b_stride, m, m_stride, W, H,   \
                              sse, &sum);                                    \
    return *sse - (((int64_t)sum * sum) / (W * H));                          \
  }                                                                          \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
835
  unsigned int aom_highbd_12_masked_variance##W##x##H##_c(                   \
clang-format's avatar
clang-format committed
836
837
838
839
840
841
842
      const uint8_t *a, int a_stride, const uint8_t *b, int b_stride,        \
      const uint8_t *m, int m_stride, unsigned int *sse) {                   \
    int sum;                                                                 \
    highbd_12_masked_variance(a, a_stride, b, b_stride, m, m_stride, W, H,   \
                              sse, &sum);                                    \
    return *sse - (((int64_t)sum * sum) / (W * H));                          \
  }
843

clang-format's avatar
clang-format committed
844
#define HIGHBD_MASK_SUBPIX_VAR(W, H)                                          \
Yaowu Xu's avatar
Yaowu Xu committed
845
  unsigned int aom_highbd_masked_sub_pixel_variance##W##x##H##_c(             \
clang-format's avatar
clang-format committed
846
847
848
849
850
851
      const uint8_t *src, int src_stride, int xoffset, int yoffset,           \
      const uint8_t *dst, int dst_stride, const uint8_t *msk, int msk_stride, \
      unsigned int *sse) {                                                    \
    uint16_t fdata3[(H + 1) * W];                                             \
    uint16_t temp2[H * W];                                                    \
                                                                              \
Yaowu Xu's avatar
Yaowu Xu committed
852
    aom_highbd_var_filter_block2d_bil_first_pass(                             \
clang-format's avatar
clang-format committed
853
        src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);  \
Yaowu Xu's avatar
Yaowu Xu committed
854
    aom_highbd_var_filter_block2d_bil_second_pass(                            \
clang-format's avatar
clang-format committed
855
856
        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);             \
                                                                              \
Yaowu Xu's avatar
Yaowu Xu committed
857
    return aom_highbd_masked_variance##W##x##H##_c(                           \
clang-format's avatar
clang-format committed
858
859
860
        CONVERT_TO_BYTEPTR(temp2), W, dst, dst_stride, msk, msk_stride, sse); \
  }                                                                           \
                                                                              \
Yaowu Xu's avatar
Yaowu Xu committed
861
  unsigned int aom_highbd_10_masked_sub_pixel_variance##W##x##H##_c(          \
clang-format's avatar
clang-format committed
862
863
864
865
866
867
      const uint8_t *src, int src_stride, int xoffset, int yoffset,           \
      const uint8_t *dst, int dst_stride, const uint8_t *msk, int msk_stride, \
      unsigned int *sse) {                                                    \
    uint16_t fdata3[(H + 1) * W];                                             \
    uint16_t temp2[H * W];                                                    \
                                                                              \
Yaowu Xu's avatar
Yaowu Xu committed
868
    aom_highbd_var_filter_block2d_bil_first_pass(                             \
clang-format's avatar
clang-format committed
869
        src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);  \
Yaowu Xu's avatar
Yaowu Xu committed
870
    aom_highbd_var_filter_block2d_bil_second_pass(                            \
clang-format's avatar
clang-format committed
871
872
        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);             \
                                                                              \
Yaowu Xu's avatar
Yaowu Xu committed
873
    return aom_highbd_10_masked_variance##W##x##H##_c(                        \
clang-format's avatar
clang-format committed
874
875
876
        CONVERT_TO_BYTEPTR(temp2), W, dst, dst_stride, msk, msk_stride, sse); \
  }                                                                           \
                                                                              \
Yaowu Xu's avatar
Yaowu Xu committed
877
  unsigned int aom_highbd_12_masked_sub_pixel_variance##W##x##H##_c(          \
clang-format's avatar
clang-format committed
878
879
880
881
882
883
      const uint8_t *src, int src_stride, int xoffset, int yoffset,           \
      const uint8_t *dst, int dst_stride, const uint8_t *msk, int msk_stride, \
      unsigned int *sse) {                                                    \
    uint16_t fdata3[(H + 1) * W];                                             \
    uint16_t temp2[H * W];                                                    \
                                                                              \
Yaowu Xu's avatar
Yaowu Xu committed
884
    aom_highbd_var_filter_block2d_bil_first_pass(                             \
clang-format's avatar
clang-format committed
885
        src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);  \
Yaowu Xu's avatar
Yaowu Xu committed
886
    aom_highbd_var_filter_block2d_bil_second_pass(                            \
clang-format's avatar
clang-format committed
887
888
        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);             \
                                                                              \
Yaowu Xu's avatar
Yaowu Xu committed
889
    return aom_highbd_12_masked_variance##W##x##H##_c(                        \
clang-format's avatar
clang-format committed
890
891
        CONVERT_TO_BYTEPTR(temp2), W, dst, dst_stride, msk, msk_stride, sse); \
  }
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941

HIGHBD_MASK_VAR(4, 4)
HIGHBD_MASK_SUBPIX_VAR(4, 4)

HIGHBD_MASK_VAR(4, 8)
HIGHBD_MASK_SUBPIX_VAR(4, 8)

HIGHBD_MASK_VAR(8, 4)
HIGHBD_MASK_SUBPIX_VAR(8, 4)

HIGHBD_MASK_VAR(8, 8)
HIGHBD_MASK_SUBPIX_VAR(8, 8)

HIGHBD_MASK_VAR(8, 16)
HIGHBD_MASK_SUBPIX_VAR(8, 16)

HIGHBD_MASK_VAR(16, 8)
HIGHBD_MASK_SUBPIX_VAR(16, 8)

HIGHBD_MASK_VAR(16, 16)
HIGHBD_MASK_SUBPIX_VAR(16, 16)

HIGHBD_MASK_VAR(16, 32)
HIGHBD_MASK_SUBPIX_VAR(16, 32)

HIGHBD_MASK_VAR(32, 16)
HIGHBD_MASK_SUBPIX_VAR(32, 16)

HIGHBD_MASK_VAR(32, 32)
HIGHBD_MASK_SUBPIX_VAR(32, 32)

HIGHBD_MASK_VAR(32, 64)
HIGHBD_MASK_SUBPIX_VAR(32, 64)

HIGHBD_MASK_VAR(64, 32)
HIGHBD_MASK_SUBPIX_VAR(64, 32)

HIGHBD_MASK_VAR(64, 64)
HIGHBD_MASK_SUBPIX_VAR(64, 64)

#if CONFIG_EXT_PARTITION
HIGHBD_MASK_VAR(64, 128)
HIGHBD_MASK_SUBPIX_VAR(64, 128)

HIGHBD_MASK_VAR(128, 64)
HIGHBD_MASK_SUBPIX_VAR(128, 64)

HIGHBD_MASK_VAR(128, 128)
HIGHBD_MASK_SUBPIX_VAR(128, 128)
#endif  // CONFIG_EXT_PARTITION
Yaowu Xu's avatar
Yaowu Xu committed
942
943
#endif  // CONFIG_AOM_HIGHBITDEPTH
#endif  // CONFIG_AV1 && CONFIG_EXT_INTER
944

Yue Chen's avatar
Yue Chen committed
945
#if CONFIG_AV1 && CONFIG_MOTION_VAR
clang-format's avatar
clang-format committed
946
static INLINE void obmc_variance(const uint8_t *pre, int pre_stride,
947
948
                                 const int32_t *wsrc, const int32_t *mask,
                                 int w, int h, unsigned int *sse, int *sum) {
949
950
951
952
953
954
955
  int i, j;

  *sse = 0;
  *sum = 0;

  for (i = 0; i < h; i++) {
    for (j = 0; j < w; j++) {
956
      int diff = ROUND_POWER_OF_TWO_SIGNED(wsrc[j] - pre[j] * mask[j], 12);
957
958
959
960
      *sum += diff;
      *sse += diff * diff;
    }

961
962
963
    pre += pre_stride;
    wsrc += w;
    mask += w;
964
965
966
  }
}

clang-format's avatar
clang-format committed
967
#define OBMC_VAR(W, H)                                           \
Yaowu Xu's avatar
Yaowu Xu committed
968
  unsigned int aom_obmc_variance##W##x##H##_c(                   \
clang-format's avatar
clang-format committed
969
970
971
972
973
974
      const uint8_t *pre, int pre_stride, const int32_t *wsrc,   \
      const int32_t *mask, unsigned int *sse) {                  \
    int sum;                                                     \
    obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum); \
    return *sse - (((int64_t)sum * sum) / (W * H));              \
  }
975

clang-format's avatar
clang-format committed
976
#define OBMC_SUBPIX_VAR(W, H)                                               \
Yaowu Xu's avatar
Yaowu Xu committed
977
  unsigned int aom_obmc_sub_pixel_variance##W##x##H##_c(                    \
clang-format's avatar
clang-format committed
978
979
980
981
982
983
984
985
986
987
      const uint8_t *pre, int pre_stride, int xoffset, int yoffset,         \
      const int32_t *wsrc, const int32_t *mask, unsigned int *sse) {        \
    uint16_t fdata3[(H + 1) * W];                                           \
    uint8_t temp2[H * W];                                                   \
                                                                            \
    var_filter_block2d_bil_first_pass(pre, fdata3, pre_stride, 1, H + 1, W, \
                                      bilinear_filters_2t[xoffset]);        \
    var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,           \
                                       bilinear_filters_2t[yoffset]);       \
                                                                            \
Yaowu Xu's avatar
Yaowu Xu committed
988
    return aom_obmc_variance##W##x##H##_c(temp2, W, wsrc, mask, sse);       \
clang-format's avatar
clang-format committed
989
  }
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040

OBMC_VAR(4, 4)
OBMC_SUBPIX_VAR(4, 4)

OBMC_VAR(4, 8)
OBMC_SUBPIX_VAR(4, 8)

OBMC_VAR(8, 4)
OBMC_SUBPIX_VAR(8, 4)

OBMC_VAR(8, 8)
OBMC_SUBPIX_VAR(8, 8)

OBMC_VAR(8, 16)
OBMC_SUBPIX_VAR(8, 16)

OBMC_VAR(16, 8)
OBMC_SUBPIX_VAR(16, 8)

OBMC_VAR(16, 16)
OBMC_SUBPIX_VAR(16, 16)

OBMC_VAR(16, 32)
OBMC_SUBPIX_VAR(16, 32)

OBMC_VAR(32, 16)
OBMC_SUBPIX_VAR(32, 16)

OBMC_VAR(32, 32)
OBMC_SUBPIX_VAR(32, 32)

OBMC_VAR(32, 64)
OBMC_SUBPIX_VAR(32, 64)

OBMC_VAR(64, 32)
OBMC_SUBPIX_VAR(64, 32)

OBMC_VAR(64, 64)
OBMC_SUBPIX_VAR(64, 64)

#if CONFIG_EXT_PARTITION
OBMC_VAR(64, 128)
OBMC_SUBPIX_VAR(64, 128)

OBMC_VAR(128, 64)
OBMC_SUBPIX_VAR(128, 64)

OBMC_VAR(128, 128)
OBMC_SUBPIX_VAR(128, 128)
#endif  // CONFIG_EXT_PARTITION

Yaowu Xu's avatar
Yaowu Xu committed
1041
#if CONFIG_AOM_HIGHBITDEPTH
1042
1043
static INLINE void highbd_obmc_variance64(const uint8_t *pre8, int pre_stride,
                                          const int32_t *wsrc,
clang-format's avatar
clang-format committed
1044
                                          const int32_t *mask, int w, int h,
1045
                                          uint64_t *sse, int64_t *sum) {
1046
  int i, j;
1047
  uint16_t *pre = CONVERT_TO_SHORTPTR(pre8);
1048
1049
1050
1051
1052
1053

  *sse = 0;
  *sum = 0;

  for (i = 0; i < h; i++) {
    for (j = 0; j < w; j++) {
1054
      int diff = ROUND_POWER_OF_TWO_SIGNED(wsrc[j] - pre[j] * mask[j], 12);
1055
1056
1057
1058
      *sum += diff;
      *sse += diff * diff;
    }

1059
1060
1061
    pre += pre_stride;
    wsrc += w;
    mask += w;
1062
1063
1064
  }
}

1065
1066
static INLINE void highbd_obmc_variance(const uint8_t *pre8, int pre_stride,
                                        const int32_t *wsrc,
clang-format's avatar
clang-format committed
1067
                                        const int32_t *mask, int w, int h,
1068
                                        unsigned int *sse, int *sum) {
1069
1070
  int64_t sum64;
  uint64_t sse64;
1071
  highbd_obmc_variance64(pre8, pre_stride, wsrc, mask, w, h, &sse64, &sum64);
1072
1073
1074
1075
  *sum = (int)sum64;
  *sse = (unsigned int)sse64;
}

clang-format's avatar
clang-format committed
1076
static INLINE void highbd_10_obmc_variance(const uint8_t *pre8, int pre_stride,
1077
                                           const int32_t *wsrc,
clang-format's avatar
clang-format committed
1078
                                           const int32_t *mask, int w, int h,
1079
                                           unsigned int *sse, int *sum) {
1080
1081
  int64_t sum64;
  uint64_t sse64;
1082
  highbd_obmc_variance64(pre8, pre_stride, wsrc, mask, w, h, &sse64, &sum64);
1083
1084
1085
1086
  *sum = (int)ROUND_POWER_OF_TWO(sum64, 2);
  *sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 4);
}

clang-format's avatar
clang-format committed
1087
static INLINE void highbd_12_obmc_variance(const uint8_t *pre8, int pre_stride,
1088
                                           const int32_t *wsrc,
clang-format's avatar
clang-format committed
1089
                                           const int32_t *mask, int w, int h,
1090
                                           unsigned int *sse, int *sum) {
1091
1092
  int64_t sum64;
  uint64_t sse64;
1093
  highbd_obmc_variance64(pre8, pre_stride, wsrc, mask, w, h, &sse64, &sum64);
1094
1095
1096
1097
  *sum = (int)ROUND_POWER_OF_TWO(sum64, 4);
  *sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 8);
}

clang-format's avatar
clang-format committed
1098
#define HIGHBD_OBMC_VAR(W, H)                                              \
Yaowu Xu's avatar
Yaowu Xu committed
1099
  unsigned int aom_highbd_obmc_variance##W##x##H##_c(                      \
clang-format's avatar
clang-format committed
1100
1101
1102
1103
1104
1105
1106
      const uint8_t *pre, int pre_stride, const int32_t *wsrc,             \
      const int32_t *mask, unsigned int *sse) {                            \
    int sum;                                                               \
    highbd_obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum);    \
    return *sse - (((int64_t)sum * sum) / (W * H));                        \
  }                                                                        \
                                                                           \
Yaowu Xu's avatar
Yaowu Xu committed
1107
  unsigned int aom_highbd_10_obmc_variance##W##x##H##_c(                   \
clang-format's avatar
clang-format committed
1108
1109
1110
1111
1112
1113
1114
      const uint8_t *pre, int pre_stride, const int32_t *wsrc,             \
      const int32_t *mask, unsigned int *sse) {                            \
    int sum;                                                               \
    highbd_10_obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum); \
    return *sse - (((int64_t)sum * sum) / (W * H));                        \
  }                                                                        \
                                                                           \
Yaowu Xu's avatar
Yaowu Xu committed
1115
  unsigned int aom_highbd_12_obmc_variance##W##x##H##_c(                   \
clang-format's avatar
clang-format committed
1116
1117
1118
1119
1120
1121
      const uint8_t *pre, int pre_stride, const int32_t *wsrc,             \
      const int32_t *mask, unsigned int *sse) {                            \
    int sum;                                                               \
    highbd_12_obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum); \
    return *sse - (((int64_t)sum * sum) / (W * H));                        \
  }
1122

clang-format's avatar
clang-format committed
1123
#define HIGHBD_OBMC_SUBPIX_VAR(W, H)                                           \
Yaowu Xu's avatar
Yaowu Xu committed
1124
  unsigned int aom_highbd_obmc_sub_pixel_variance##W##x##H##_c(                \
clang-format's avatar
clang-format committed
1125
1126
1127
1128
1129
      const uint8_t *pre, int pre_stride, int xoffset, int yoffset,            \
      const int32_t *wsrc, const int32_t *mask, unsigned int *sse) {           \
    uint16_t fdata3[(H + 1) * W];                                              \
    uint16_t temp2[H * W];                                                     \
                                                                               \
Yaowu Xu's avatar
Yaowu Xu committed
1130
    aom_highbd_var_filter_block2d_bil_first_pass(                              \
clang-format's avatar
clang-format committed
1131
        pre, fdata3, pre_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);   \
Yaowu Xu's avatar
Yaowu Xu committed
1132
    aom_highbd_var_filter_block2d_bil_second_pass(                             \
clang-format's avatar
clang-format committed
1133
1134
        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);              \
                                                                               \
Yaowu Xu's avatar
Yaowu Xu committed
1135
    return aom_highbd_obmc_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
clang-format's avatar
clang-format committed
1136
1137
1138
                                                 wsrc, mask, sse);             \
  }                                                                            \
                                                                               \
Yaowu Xu's avatar
Yaowu Xu committed
1139
  unsigned int aom_highbd_10_obmc_sub_pixel_variance##W##x##H##_c(             \
clang-format's avatar
clang-format committed
1140
1141
1142
1143
1144
      const uint8_t *pre, int pre_stride, int xoffset, int yoffset,            \
      const int32_t *wsrc, const int32_t *mask, unsigned int *sse) {           \
    uint16_t fdata3[(H + 1) * W];                                              \
    uint16_t temp2[H * W];                                                     \
                                                                               \
Yaowu Xu's avatar
Yaowu Xu committed
1145
    aom_highbd_var_filter_block2d_bil_first_pass(                              \
clang-format's avatar
clang-format committed
1146
        pre, fdata3, pre_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);   \
Yaowu Xu's avatar
Yaowu Xu committed
1147
    aom_highbd_var_filter_block2d_bil_second_pass(                             \
clang-format's avatar
clang-format committed
1148
1149
        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);              \
                                                                               \
Yaowu Xu's avatar
Yaowu Xu committed
1150
    return aom_highbd_10_obmc_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
clang-format's avatar
clang-format committed
1151
1152
1153
                                                    W, wsrc, mask, sse);       \
  }                                                                            \
                                                                               \
Yaowu Xu's avatar
Yaowu Xu committed
1154
  unsigned int aom_highbd_12_obmc_sub_pixel_variance##W##x##H##_c(             \
clang-format's avatar
clang-format committed
1155
1156
1157
1158
1159
      const uint8_t *pre, int pre_stride, int xoffset, int yoffset,            \
      const int32_t *wsrc, const int32_t *mask, unsigned int *sse) {           \
    uint16_t fdata3[(H + 1) * W];                                              \
    uint16_t temp2[H * W];                                                     \
                                                                               \
Yaowu Xu's avatar
Yaowu Xu committed
1160
    aom_highbd_var_filter_block2d_bil_first_pass(                              \
clang-format's avatar
clang-format committed
1161
        pre, fdata3, pre_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);   \
Yaowu Xu's avatar
Yaowu Xu committed
1162
    aom_highbd_var_filter_block2d_bil_second_pass(                             \
clang-format's avatar
clang-format committed
1163
1164
        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);              \
                                                                               \
Yaowu Xu's avatar
Yaowu Xu committed
1165
    return aom_highbd_12_obmc_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
clang-format's avatar
clang-format committed
1166
1167
                                                    W, wsrc, mask, sse);       \
  }
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217

HIGHBD_OBMC_VAR(4, 4)
HIGHBD_OBMC_SUBPIX_VAR(4, 4)

HIGHBD_OBMC_VAR(4, 8)
HIGHBD_OBMC_SUBPIX_VAR(4, 8)

HIGHBD_OBMC_VAR(8, 4)
HIGHBD_OBMC_SUBPIX_VAR(8, 4)

HIGHBD_OBMC_VAR(8, 8)
HIGHBD_OBMC_SUBPIX_VAR(8, 8)

HIGHBD_OBMC_VAR(8, 16)
HIGHBD_OBMC_SUBPIX_VAR(8, 16)

HIGHBD_OBMC_VAR(16, 8)
HIGHBD_OBMC_SUBPIX_VAR(16, 8)

HIGHBD_OBMC_VAR(16, 16)
HIGHBD_OBMC_SUBPIX_VAR(16, 16)

HIGHBD_OBMC_VAR(16, 32)
HIGHBD_OBMC_SUBPIX_VAR(16, 32)

HIGHBD_OBMC_VAR(32, 16)
HIGHBD_OBMC_SUBPIX_VAR(32, 16)

HIGHBD_OBMC_VAR(32, 32)
HIGHBD_OBMC_SUBPIX_VAR(32, 32)

HIGHBD_OBMC_VAR(32, 64)
HIGHBD_OBMC_SUBPIX_VAR(32, 64)

HIGHBD_OBMC_VAR(64, 32)
HIGHBD_OBMC_SUBPIX_VAR(64, 32)

HIGHBD_OBMC_VAR(64, 64)
HIGHBD_OBMC_SUBPIX_VAR(64, 64)

#if CONFIG_EXT_PARTITION
HIGHBD_OBMC_VAR(64, 128)
HIGHBD_OBMC_SUBPIX_VAR(64, 128)

HIGHBD_OBMC_VAR(128, 64)
HIGHBD_OBMC_SUBPIX_VAR(128, 64)

HIGHBD_OBMC_VAR(128, 128)
HIGHBD_OBMC_SUBPIX_VAR(128, 128)
#endif  // CONFIG_EXT_PARTITION
Yaowu Xu's avatar
Yaowu Xu committed
1218
#endif  // CONFIG_AOM_HIGHBITDEPTH
Yue Chen's avatar
Yue Chen committed
1219
#endif  // CONFIG_AV1 && CONFIG_MOTION_VAR