variance.c 56.8 KB
Newer Older
Johann's avatar
Johann committed
1
/*
Yaowu Xu's avatar
Yaowu Xu committed
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Johann's avatar
Johann committed
3
 *
Yaowu Xu's avatar
Yaowu Xu committed
4 5 6 7 8 9
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Johann's avatar
Johann committed
10
 */
11
#include <stdlib.h>
12 13
#include <string.h>
#include <assert.h>
Johann's avatar
Johann committed
14

Yaowu Xu's avatar
Yaowu Xu committed
15 16
#include "./aom_config.h"
#include "./aom_dsp_rtcd.h"
Johann's avatar
Johann committed
17

18
#include "aom_ports/mem.h"
Yaowu Xu's avatar
Yaowu Xu committed
19
#include "aom/aom_integer.h"
Johann's avatar
Johann committed
20

21
#include "aom_dsp/variance.h"
Yaowu Xu's avatar
Yaowu Xu committed
22
#include "aom_dsp/aom_filter.h"
23
#include "aom_dsp/blend.h"
Johann's avatar
Johann committed
24

25 26 27
#include "./av1_rtcd.h"
#include "av1/common/filter.h"

Yaowu Xu's avatar
Yaowu Xu committed
28
uint32_t aom_get4x4sse_cs_c(const uint8_t *a, int a_stride, const uint8_t *b,
clang-format's avatar
clang-format committed
29
                            int b_stride) {
Johann's avatar
Johann committed
30 31 32
  int distortion = 0;
  int r, c;

Johann's avatar
Johann committed
33 34
  for (r = 0; r < 4; ++r) {
    for (c = 0; c < 4; ++c) {
Johann's avatar
Johann committed
35 36 37 38 39 40 41 42 43 44 45
      int diff = a[c] - b[c];
      distortion += diff * diff;
    }

    a += a_stride;
    b += b_stride;
  }

  return distortion;
}

Yaowu Xu's avatar
Yaowu Xu committed
46
uint32_t aom_get_mb_ss_c(const int16_t *a) {
Johann's avatar
Johann committed
47 48 49 50 51 52 53 54 55
  unsigned int i, sum = 0;

  for (i = 0; i < 256; ++i) {
    sum += a[i] * a[i];
  }

  return sum;
}

Yaowu Xu's avatar
Yaowu Xu committed
56
uint32_t aom_variance_halfpixvar16x16_h_c(const uint8_t *a, int a_stride,
Johann's avatar
Johann committed
57 58
                                          const uint8_t *b, int b_stride,
                                          uint32_t *sse) {
Yaowu Xu's avatar
Yaowu Xu committed
59
  return aom_sub_pixel_variance16x16_c(a, a_stride, 4, 0, b, b_stride, sse);
Johann's avatar
Johann committed
60 61
}

Yaowu Xu's avatar
Yaowu Xu committed
62
uint32_t aom_variance_halfpixvar16x16_v_c(const uint8_t *a, int a_stride,
Johann's avatar
Johann committed
63 64
                                          const uint8_t *b, int b_stride,
                                          uint32_t *sse) {
Yaowu Xu's avatar
Yaowu Xu committed
65
  return aom_sub_pixel_variance16x16_c(a, a_stride, 0, 4, b, b_stride, sse);
Johann's avatar
Johann committed
66 67
}

Yaowu Xu's avatar
Yaowu Xu committed
68
uint32_t aom_variance_halfpixvar16x16_hv_c(const uint8_t *a, int a_stride,
Johann's avatar
Johann committed
69 70
                                           const uint8_t *b, int b_stride,
                                           uint32_t *sse) {
Yaowu Xu's avatar
Yaowu Xu committed
71
  return aom_sub_pixel_variance16x16_c(a, a_stride, 4, 4, b, b_stride, sse);
Johann's avatar
Johann committed
72 73
}

clang-format's avatar
clang-format committed
74 75
static void variance(const uint8_t *a, int a_stride, const uint8_t *b,
                     int b_stride, int w, int h, uint32_t *sse, int *sum) {
Johann's avatar
Johann committed
76 77 78 79 80
  int i, j;

  *sum = 0;
  *sse = 0;

Johann's avatar
Johann committed
81 82
  for (i = 0; i < h; ++i) {
    for (j = 0; j < w; ++j) {
Johann's avatar
Johann committed
83 84 85 86 87 88 89 90 91 92
      const int diff = a[j] - b[j];
      *sum += diff;
      *sse += diff * diff;
    }

    a += a_stride;
    b += b_stride;
  }
}

93 94 95 96 97 98 99 100
uint32_t aom_sse_odd_size(const uint8_t *a, int a_stride, const uint8_t *b,
                          int b_stride, int w, int h) {
  uint32_t sse;
  int sum;
  variance(a, a_stride, b, b_stride, w, h, &sse, &sum);
  return sse;
}

Johann's avatar
Johann committed
101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
// Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
// or vertical direction to produce the filtered output block. Used to implement
// the first-pass of 2-D separable filter.
//
// Produces int16_t output to retain precision for the next pass. Two filter
// taps should sum to FILTER_WEIGHT. pixel_step defines whether the filter is
// applied horizontally (pixel_step = 1) or vertically (pixel_step = stride).
// It defines the offset required to move from one input to the next.
static void var_filter_block2d_bil_first_pass(const uint8_t *a, uint16_t *b,
                                              unsigned int src_pixels_per_line,
                                              int pixel_step,
                                              unsigned int output_height,
                                              unsigned int output_width,
                                              const uint8_t *filter) {
  unsigned int i, j;

  for (i = 0; i < output_height; ++i) {
    for (j = 0; j < output_width; ++j) {
clang-format's avatar
clang-format committed
119 120
      b[j] = ROUND_POWER_OF_TWO(
          (int)a[0] * filter[0] + (int)a[pixel_step] * filter[1], FILTER_BITS);
Johann's avatar
Johann committed
121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144

      ++a;
    }

    a += src_pixels_per_line - output_width;
    b += output_width;
  }
}

// Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
// or vertical direction to produce the filtered output block. Used to implement
// the second-pass of 2-D separable filter.
//
// Requires 16-bit input as produced by filter_block2d_bil_first_pass. Two
// filter taps should sum to FILTER_WEIGHT. pixel_step defines whether the
// filter is applied horizontally (pixel_step = 1) or vertically
// (pixel_step = stride). It defines the offset required to move from one input
// to the next. Output is 8-bit.
static void var_filter_block2d_bil_second_pass(const uint16_t *a, uint8_t *b,
                                               unsigned int src_pixels_per_line,
                                               unsigned int pixel_step,
                                               unsigned int output_height,
                                               unsigned int output_width,
                                               const uint8_t *filter) {
clang-format's avatar
clang-format committed
145
  unsigned int i, j;
Johann's avatar
Johann committed
146 147 148

  for (i = 0; i < output_height; ++i) {
    for (j = 0; j < output_width; ++j) {
clang-format's avatar
clang-format committed
149 150
      b[j] = ROUND_POWER_OF_TWO(
          (int)a[0] * filter[0] + (int)a[pixel_step] * filter[1], FILTER_BITS);
Johann's avatar
Johann committed
151 152 153 154 155 156 157 158
      ++a;
    }

    a += src_pixels_per_line - output_width;
    b += output_width;
  }
}

clang-format's avatar
clang-format committed
159
#define VAR(W, H)                                                    \
Yaowu Xu's avatar
Yaowu Xu committed
160
  uint32_t aom_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
clang-format's avatar
clang-format committed
161 162 163 164
                                     const uint8_t *b, int b_stride, \
                                     uint32_t *sse) {                \
    int sum;                                                         \
    variance(a, a_stride, b, b_stride, W, H, sse, &sum);             \
165
    return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H));        \
clang-format's avatar
clang-format committed
166
  }
Johann's avatar
Johann committed
167

clang-format's avatar
clang-format committed
168
#define SUBPIX_VAR(W, H)                                                \
Yaowu Xu's avatar
Yaowu Xu committed
169
  uint32_t aom_sub_pixel_variance##W##x##H##_c(                         \
clang-format's avatar
clang-format committed
170 171 172 173 174 175 176 177 178 179
      const uint8_t *a, int a_stride, int xoffset, int yoffset,         \
      const uint8_t *b, int b_stride, uint32_t *sse) {                  \
    uint16_t fdata3[(H + 1) * W];                                       \
    uint8_t temp2[H * W];                                               \
                                                                        \
    var_filter_block2d_bil_first_pass(a, fdata3, a_stride, 1, H + 1, W, \
                                      bilinear_filters_2t[xoffset]);    \
    var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,       \
                                       bilinear_filters_2t[yoffset]);   \
                                                                        \
Yaowu Xu's avatar
Yaowu Xu committed
180
    return aom_variance##W##x##H##_c(temp2, W, b, b_stride, sse);       \
clang-format's avatar
clang-format committed
181
  }
Johann's avatar
Johann committed
182

clang-format's avatar
clang-format committed
183
#define SUBPIX_AVG_VAR(W, H)                                            \
Yaowu Xu's avatar
Yaowu Xu committed
184
  uint32_t aom_sub_pixel_avg_variance##W##x##H##_c(                     \
clang-format's avatar
clang-format committed
185 186 187 188 189 190 191 192 193 194 195 196
      const uint8_t *a, int a_stride, int xoffset, int yoffset,         \
      const uint8_t *b, int b_stride, uint32_t *sse,                    \
      const uint8_t *second_pred) {                                     \
    uint16_t fdata3[(H + 1) * W];                                       \
    uint8_t temp2[H * W];                                               \
    DECLARE_ALIGNED(16, uint8_t, temp3[H * W]);                         \
                                                                        \
    var_filter_block2d_bil_first_pass(a, fdata3, a_stride, 1, H + 1, W, \
                                      bilinear_filters_2t[xoffset]);    \
    var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,       \
                                       bilinear_filters_2t[yoffset]);   \
                                                                        \
Yaowu Xu's avatar
Yaowu Xu committed
197
    aom_comp_avg_pred(temp3, second_pred, W, H, temp2, W);              \
clang-format's avatar
clang-format committed
198
                                                                        \
Yaowu Xu's avatar
Yaowu Xu committed
199
    return aom_variance##W##x##H##_c(temp3, W, b, b_stride, sse);       \
clang-format's avatar
clang-format committed
200
  }
Johann's avatar
Johann committed
201

Johann's avatar
Johann committed
202 203 204 205
/* Identical to the variance call except it takes an additional parameter, sum,
 * and returns that value using pass-by-reference instead of returning
 * sse - sum^2 / w*h
 */
clang-format's avatar
clang-format committed
206
#define GET_VAR(W, H)                                                         \
Yaowu Xu's avatar
Yaowu Xu committed
207
  void aom_get##W##x##H##var_c(const uint8_t *a, int a_stride,                \
clang-format's avatar
clang-format committed
208 209 210 211
                               const uint8_t *b, int b_stride, uint32_t *sse, \
                               int *sum) {                                    \
    variance(a, a_stride, b, b_stride, W, H, sse, sum);                       \
  }
Johann's avatar
Johann committed
212 213 214 215 216

/* Identical to the variance call except it does not calculate the
 * sse - sum^2 / w*h and returns sse in addtion to modifying the passed in
 * variable.
 */
clang-format's avatar
clang-format committed
217
#define MSE(W, H)                                               \
Yaowu Xu's avatar
Yaowu Xu committed
218
  uint32_t aom_mse##W##x##H##_c(const uint8_t *a, int a_stride, \
clang-format's avatar
clang-format committed
219 220 221 222 223 224
                                const uint8_t *b, int b_stride, \
                                uint32_t *sse) {                \
    int sum;                                                    \
    variance(a, a_stride, b, b_stride, W, H, sse, &sum);        \
    return *sse;                                                \
  }
Johann's avatar
Johann committed
225

Johann's avatar
Johann committed
226 227
/* All three forms of the variance are available in the same sizes. */
#define VARIANCES(W, H) \
clang-format's avatar
clang-format committed
228 229 230
  VAR(W, H)             \
  SUBPIX_VAR(W, H)      \
  SUBPIX_AVG_VAR(W, H)
Johann's avatar
Johann committed
231

Yaowu Xu's avatar
Yaowu Xu committed
232
#if CONFIG_AV1 && CONFIG_EXT_PARTITION
233 234 235
VARIANCES(128, 128)
VARIANCES(128, 64)
VARIANCES(64, 128)
Yaowu Xu's avatar
Yaowu Xu committed
236
#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
Johann's avatar
Johann committed
237 238 239 240 241 242 243 244 245 246 247 248 249
VARIANCES(64, 64)
VARIANCES(64, 32)
VARIANCES(32, 64)
VARIANCES(32, 32)
VARIANCES(32, 16)
VARIANCES(16, 32)
VARIANCES(16, 16)
VARIANCES(16, 8)
VARIANCES(8, 16)
VARIANCES(8, 8)
VARIANCES(8, 4)
VARIANCES(4, 8)
VARIANCES(4, 4)
250 251
VARIANCES(4, 2)
VARIANCES(2, 4)
Jingning Han's avatar
Jingning Han committed
252
VARIANCES(2, 2)
Johann's avatar
Johann committed
253

254 255 256 257 258 259 260
#if CONFIG_AV1 && CONFIG_EXT_PARTITION_TYPES
VARIANCES(4, 16)
VARIANCES(16, 4)
VARIANCES(8, 32)
VARIANCES(32, 8)
#endif

Johann's avatar
Johann committed
261 262 263 264 265 266 267 268
GET_VAR(16, 16)
GET_VAR(8, 8)

MSE(16, 16)
MSE(16, 8)
MSE(8, 16)
MSE(8, 8)

Yaowu Xu's avatar
Yaowu Xu committed
269
void aom_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width,
clang-format's avatar
clang-format committed
270
                         int height, const uint8_t *ref, int ref_stride) {
Johann's avatar
Johann committed
271 272
  int i, j;

Johann's avatar
Johann committed
273 274
  for (i = 0; i < height; ++i) {
    for (j = 0; j < width; ++j) {
Johann's avatar
Johann committed
275 276 277 278 279 280 281 282 283
      const int tmp = pred[j] + ref[j];
      comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
    }
    comp_pred += width;
    pred += width;
    ref += ref_stride;
  }
}

284
// Get pred block from up-sampled reference.
Yaowu Xu's avatar
Yaowu Xu committed
285
void aom_upsampled_pred_c(uint8_t *comp_pred, int width, int height,
286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327
                          int subpel_x_q3, int subpel_y_q3, const uint8_t *ref,
                          int ref_stride) {
  if (!subpel_x_q3 && !subpel_y_q3) {
    int i;
    for (i = 0; i < height; i++) {
      memcpy(comp_pred, ref, width * sizeof(*comp_pred));
      comp_pred += width;
      ref += ref_stride;
    }
  } else {
    InterpFilterParams filter;
    filter = av1_get_interp_filter_params(EIGHTTAP_REGULAR);
    if (!subpel_y_q3) {
      const int16_t *kernel;
      kernel = av1_get_interp_filter_subpel_kernel(filter, subpel_x_q3 << 1);
      /*Directly call C version to allow this to work for small (2x2) sizes.*/
      aom_convolve8_horiz_c(ref, ref_stride, comp_pred, width, kernel, 16, NULL,
                            -1, width, height);
    } else if (!subpel_x_q3) {
      const int16_t *kernel;
      kernel = av1_get_interp_filter_subpel_kernel(filter, subpel_y_q3 << 1);
      /*Directly call C version to allow this to work for small (2x2) sizes.*/
      aom_convolve8_vert_c(ref, ref_stride, comp_pred, width, NULL, -1, kernel,
                           16, width, height);
    } else {
      DECLARE_ALIGNED(16, uint8_t,
                      temp[((MAX_SB_SIZE * 2 + 16) + 16) * MAX_SB_SIZE]);
      const int16_t *kernel_x;
      const int16_t *kernel_y;
      int intermediate_height;
      kernel_x = av1_get_interp_filter_subpel_kernel(filter, subpel_x_q3 << 1);
      kernel_y = av1_get_interp_filter_subpel_kernel(filter, subpel_y_q3 << 1);
      intermediate_height =
          (((height - 1) * 8 + subpel_y_q3) >> 3) + filter.taps;
      assert(intermediate_height <= (MAX_SB_SIZE * 2 + 16) + 16);
      /*Directly call C versions to allow this to work for small (2x2) sizes.*/
      aom_convolve8_horiz_c(ref - ref_stride * ((filter.taps >> 1) - 1),
                            ref_stride, temp, MAX_SB_SIZE, kernel_x, 16, NULL,
                            -1, width, intermediate_height);
      aom_convolve8_vert_c(temp + MAX_SB_SIZE * ((filter.taps >> 1) - 1),
                           MAX_SB_SIZE, comp_pred, width, NULL, -1, kernel_y,
                           16, width, height);
328
    }
clang-format's avatar
clang-format committed
329
  }
330 331
}

Yaowu Xu's avatar
Yaowu Xu committed
332
void aom_comp_avg_upsampled_pred_c(uint8_t *comp_pred, const uint8_t *pred,
333 334
                                   int width, int height, int subpel_x_q3,
                                   int subpel_y_q3, const uint8_t *ref,
clang-format's avatar
clang-format committed
335 336 337
                                   int ref_stride) {
  int i, j;

338 339
  aom_upsampled_pred(comp_pred, width, height, subpel_x_q3, subpel_y_q3, ref,
                     ref_stride);
clang-format's avatar
clang-format committed
340 341
  for (i = 0; i < height; i++) {
    for (j = 0; j < width; j++) {
342
      comp_pred[j] = ROUND_POWER_OF_TWO(comp_pred[j] + pred[j], 1);
343
    }
clang-format's avatar
clang-format committed
344 345 346
    comp_pred += width;
    pred += width;
  }
347 348
}

349
#if CONFIG_HIGHBITDEPTH
clang-format's avatar
clang-format committed
350 351 352
static void highbd_variance64(const uint8_t *a8, int a_stride,
                              const uint8_t *b8, int b_stride, int w, int h,
                              uint64_t *sse, int64_t *sum) {
Johann's avatar
Johann committed
353 354 355 356 357 358 359
  int i, j;

  uint16_t *a = CONVERT_TO_SHORTPTR(a8);
  uint16_t *b = CONVERT_TO_SHORTPTR(b8);
  *sum = 0;
  *sse = 0;

Johann's avatar
Johann committed
360 361
  for (i = 0; i < h; ++i) {
    for (j = 0; j < w; ++j) {
Johann's avatar
Johann committed
362 363 364 365 366 367 368 369 370
      const int diff = a[j] - b[j];
      *sum += diff;
      *sse += diff * diff;
    }
    a += a_stride;
    b += b_stride;
  }
}

371 372 373 374 375 376 377 378
uint64_t aom_highbd_sse_odd_size(const uint8_t *a, int a_stride,
                                 const uint8_t *b, int b_stride, int w, int h) {
  uint64_t sse;
  int64_t sum;
  highbd_variance64(a, a_stride, b, b_stride, w, h, &sse, &sum);
  return sse;
}

clang-format's avatar
clang-format committed
379 380 381
static void highbd_8_variance(const uint8_t *a8, int a_stride,
                              const uint8_t *b8, int b_stride, int w, int h,
                              uint32_t *sse, int *sum) {
Johann's avatar
Johann committed
382
  uint64_t sse_long = 0;
383
  int64_t sum_long = 0;
Johann's avatar
Johann committed
384
  highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
Johann's avatar
Johann committed
385
  *sse = (uint32_t)sse_long;
Johann's avatar
Johann committed
386 387 388
  *sum = (int)sum_long;
}

clang-format's avatar
clang-format committed
389 390 391
static void highbd_10_variance(const uint8_t *a8, int a_stride,
                               const uint8_t *b8, int b_stride, int w, int h,
                               uint32_t *sse, int *sum) {
Johann's avatar
Johann committed
392
  uint64_t sse_long = 0;
393
  int64_t sum_long = 0;
Johann's avatar
Johann committed
394
  highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
Johann's avatar
Johann committed
395
  *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 4);
Johann's avatar
Johann committed
396 397 398
  *sum = (int)ROUND_POWER_OF_TWO(sum_long, 2);
}

clang-format's avatar
clang-format committed
399 400 401
static void highbd_12_variance(const uint8_t *a8, int a_stride,
                               const uint8_t *b8, int b_stride, int w, int h,
                               uint32_t *sse, int *sum) {
Johann's avatar
Johann committed
402
  uint64_t sse_long = 0;
403
  int64_t sum_long = 0;
Johann's avatar
Johann committed
404
  highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
Johann's avatar
Johann committed
405
  *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 8);
Johann's avatar
Johann committed
406 407 408
  *sum = (int)ROUND_POWER_OF_TWO(sum_long, 4);
}

clang-format's avatar
clang-format committed
409
#define HIGHBD_VAR(W, H)                                                       \
Yaowu Xu's avatar
Yaowu Xu committed
410
  uint32_t aom_highbd_8_variance##W##x##H##_c(const uint8_t *a, int a_stride,  \
clang-format's avatar
clang-format committed
411 412 413 414
                                              const uint8_t *b, int b_stride,  \
                                              uint32_t *sse) {                 \
    int sum;                                                                   \
    highbd_8_variance(a, a_stride, b, b_stride, W, H, sse, &sum);              \
415
    return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H));                  \
clang-format's avatar
clang-format committed
416 417
  }                                                                            \
                                                                               \
Yaowu Xu's avatar
Yaowu Xu committed
418
  uint32_t aom_highbd_10_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
clang-format's avatar
clang-format committed
419 420 421 422 423 424 425 426 427
                                               const uint8_t *b, int b_stride, \
                                               uint32_t *sse) {                \
    int sum;                                                                   \
    int64_t var;                                                               \
    highbd_10_variance(a, a_stride, b, b_stride, W, H, sse, &sum);             \
    var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H));                  \
    return (var >= 0) ? (uint32_t)var : 0;                                     \
  }                                                                            \
                                                                               \
Yaowu Xu's avatar
Yaowu Xu committed
428
  uint32_t aom_highbd_12_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
clang-format's avatar
clang-format committed
429 430 431 432 433 434 435 436
                                               const uint8_t *b, int b_stride, \
                                               uint32_t *sse) {                \
    int sum;                                                                   \
    int64_t var;                                                               \
    highbd_12_variance(a, a_stride, b, b_stride, W, H, sse, &sum);             \
    var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H));                  \
    return (var >= 0) ? (uint32_t)var : 0;                                     \
  }
Johann's avatar
Johann committed
437

clang-format's avatar
clang-format committed
438
#define HIGHBD_GET_VAR(S)                                                    \
Yaowu Xu's avatar
Yaowu Xu committed
439
  void aom_highbd_8_get##S##x##S##var_c(const uint8_t *src, int src_stride,  \
clang-format's avatar
clang-format committed
440 441 442 443 444
                                        const uint8_t *ref, int ref_stride,  \
                                        uint32_t *sse, int *sum) {           \
    highbd_8_variance(src, src_stride, ref, ref_stride, S, S, sse, sum);     \
  }                                                                          \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
445
  void aom_highbd_10_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
clang-format's avatar
clang-format committed
446 447 448 449 450
                                         const uint8_t *ref, int ref_stride, \
                                         uint32_t *sse, int *sum) {          \
    highbd_10_variance(src, src_stride, ref, ref_stride, S, S, sse, sum);    \
  }                                                                          \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
451
  void aom_highbd_12_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
clang-format's avatar
clang-format committed
452 453 454 455
                                         const uint8_t *ref, int ref_stride, \
                                         uint32_t *sse, int *sum) {          \
    highbd_12_variance(src, src_stride, ref, ref_stride, S, S, sse, sum);    \
  }
Johann's avatar
Johann committed
456

clang-format's avatar
clang-format committed
457
#define HIGHBD_MSE(W, H)                                                      \
Yaowu Xu's avatar
Yaowu Xu committed
458
  uint32_t aom_highbd_8_mse##W##x##H##_c(const uint8_t *src, int src_stride,  \
clang-format's avatar
clang-format committed
459 460 461 462 463 464 465
                                         const uint8_t *ref, int ref_stride,  \
                                         uint32_t *sse) {                     \
    int sum;                                                                  \
    highbd_8_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum);     \
    return *sse;                                                              \
  }                                                                           \
                                                                              \
Yaowu Xu's avatar
Yaowu Xu committed
466
  uint32_t aom_highbd_10_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
clang-format's avatar
clang-format committed
467 468 469 470 471 472 473
                                          const uint8_t *ref, int ref_stride, \
                                          uint32_t *sse) {                    \
    int sum;                                                                  \
    highbd_10_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum);    \
    return *sse;                                                              \
  }                                                                           \
                                                                              \
Yaowu Xu's avatar
Yaowu Xu committed
474
  uint32_t aom_highbd_12_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
clang-format's avatar
clang-format committed
475 476 477 478 479 480
                                          const uint8_t *ref, int ref_stride, \
                                          uint32_t *sse) {                    \
    int sum;                                                                  \
    highbd_12_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum);    \
    return *sse;                                                              \
  }
Johann's avatar
Johann committed
481

Yaowu Xu's avatar
Yaowu Xu committed
482
void aom_highbd_var_filter_block2d_bil_first_pass(
clang-format's avatar
clang-format committed
483 484 485
    const uint8_t *src_ptr8, uint16_t *output_ptr,
    unsigned int src_pixels_per_line, int pixel_step,
    unsigned int output_height, unsigned int output_width,
Johann's avatar
Johann committed
486 487 488 489 490
    const uint8_t *filter) {
  unsigned int i, j;
  uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8);
  for (i = 0; i < output_height; ++i) {
    for (j = 0; j < output_width; ++j) {
clang-format's avatar
clang-format committed
491 492 493
      output_ptr[j] = ROUND_POWER_OF_TWO(
          (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
          FILTER_BITS);
Johann's avatar
Johann committed
494 495 496 497 498 499 500 501 502 503

      ++src_ptr;
    }

    // Next row...
    src_ptr += src_pixels_per_line - output_width;
    output_ptr += output_width;
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
504
void aom_highbd_var_filter_block2d_bil_second_pass(
clang-format's avatar
clang-format committed
505 506 507
    const uint16_t *src_ptr, uint16_t *output_ptr,
    unsigned int src_pixels_per_line, unsigned int pixel_step,
    unsigned int output_height, unsigned int output_width,
Johann's avatar
Johann committed
508
    const uint8_t *filter) {
clang-format's avatar
clang-format committed
509
  unsigned int i, j;
Johann's avatar
Johann committed
510 511 512

  for (i = 0; i < output_height; ++i) {
    for (j = 0; j < output_width; ++j) {
clang-format's avatar
clang-format committed
513 514 515
      output_ptr[j] = ROUND_POWER_OF_TWO(
          (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
          FILTER_BITS);
Johann's avatar
Johann committed
516 517 518 519 520 521 522 523
      ++src_ptr;
    }

    src_ptr += src_pixels_per_line - output_width;
    output_ptr += output_width;
  }
}

clang-format's avatar
clang-format committed
524
#define HIGHBD_SUBPIX_VAR(W, H)                                              \
Yaowu Xu's avatar
Yaowu Xu committed
525
  uint32_t aom_highbd_8_sub_pixel_variance##W##x##H##_c(                     \
clang-format's avatar
clang-format committed
526 527 528 529 530
      const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
      const uint8_t *dst, int dst_stride, uint32_t *sse) {                   \
    uint16_t fdata3[(H + 1) * W];                                            \
    uint16_t temp2[H * W];                                                   \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
531
    aom_highbd_var_filter_block2d_bil_first_pass(                            \
clang-format's avatar
clang-format committed
532
        src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
Yaowu Xu's avatar
Yaowu Xu committed
533
    aom_highbd_var_filter_block2d_bil_second_pass(                           \
clang-format's avatar
clang-format committed
534 535
        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);            \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
536
    return aom_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W,  \
clang-format's avatar
clang-format committed
537 538 539
                                              dst, dst_stride, sse);         \
  }                                                                          \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
540
  uint32_t aom_highbd_10_sub_pixel_variance##W##x##H##_c(                    \
clang-format's avatar
clang-format committed
541 542 543 544 545
      const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
      const uint8_t *dst, int dst_stride, uint32_t *sse) {                   \
    uint16_t fdata3[(H + 1) * W];                                            \
    uint16_t temp2[H * W];                                                   \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
546
    aom_highbd_var_filter_block2d_bil_first_pass(                            \
clang-format's avatar
clang-format committed
547
        src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
Yaowu Xu's avatar
Yaowu Xu committed
548
    aom_highbd_var_filter_block2d_bil_second_pass(                           \
clang-format's avatar
clang-format committed
549 550
        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);            \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
551
    return aom_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
clang-format's avatar
clang-format committed
552 553 554
                                               dst, dst_stride, sse);        \
  }                                                                          \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
555
  uint32_t aom_highbd_12_sub_pixel_variance##W##x##H##_c(                    \
clang-format's avatar
clang-format committed
556 557 558 559 560
      const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
      const uint8_t *dst, int dst_stride, uint32_t *sse) {                   \
    uint16_t fdata3[(H + 1) * W];                                            \
    uint16_t temp2[H * W];                                                   \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
561
    aom_highbd_var_filter_block2d_bil_first_pass(                            \
clang-format's avatar
clang-format committed
562
        src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
Yaowu Xu's avatar
Yaowu Xu committed
563
    aom_highbd_var_filter_block2d_bil_second_pass(                           \
clang-format's avatar
clang-format committed
564 565
        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);            \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
566
    return aom_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
clang-format's avatar
clang-format committed
567 568
                                               dst, dst_stride, sse);        \
  }
Johann's avatar
Johann committed
569

clang-format's avatar
clang-format committed
570
#define HIGHBD_SUBPIX_AVG_VAR(W, H)                                          \
Yaowu Xu's avatar
Yaowu Xu committed
571
  uint32_t aom_highbd_8_sub_pixel_avg_variance##W##x##H##_c(                 \
clang-format's avatar
clang-format committed
572 573 574 575 576 577 578
      const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
      const uint8_t *dst, int dst_stride, uint32_t *sse,                     \
      const uint8_t *second_pred) {                                          \
    uint16_t fdata3[(H + 1) * W];                                            \
    uint16_t temp2[H * W];                                                   \
    DECLARE_ALIGNED(16, uint16_t, temp3[H * W]);                             \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
579
    aom_highbd_var_filter_block2d_bil_first_pass(                            \
clang-format's avatar
clang-format committed
580
        src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
Yaowu Xu's avatar
Yaowu Xu committed
581
    aom_highbd_var_filter_block2d_bil_second_pass(                           \
clang-format's avatar
clang-format committed
582 583
        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);            \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
584
    aom_highbd_comp_avg_pred_c(temp3, second_pred, W, H,                     \
clang-format's avatar
clang-format committed
585 586
                               CONVERT_TO_BYTEPTR(temp2), W);                \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
587
    return aom_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W,  \
clang-format's avatar
clang-format committed
588 589 590
                                              dst, dst_stride, sse);         \
  }                                                                          \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
591
  uint32_t aom_highbd_10_sub_pixel_avg_variance##W##x##H##_c(                \
clang-format's avatar
clang-format committed
592 593 594 595 596 597 598
      const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
      const uint8_t *dst, int dst_stride, uint32_t *sse,                     \
      const uint8_t *second_pred) {                                          \
    uint16_t fdata3[(H + 1) * W];                                            \
    uint16_t temp2[H * W];                                                   \
    DECLARE_ALIGNED(16, uint16_t, temp3[H * W]);                             \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
599
    aom_highbd_var_filter_block2d_bil_first_pass(                            \
clang-format's avatar
clang-format committed
600
        src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
Yaowu Xu's avatar
Yaowu Xu committed
601
    aom_highbd_var_filter_block2d_bil_second_pass(                           \
clang-format's avatar
clang-format committed
602 603
        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);            \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
604
    aom_highbd_comp_avg_pred_c(temp3, second_pred, W, H,                     \
clang-format's avatar
clang-format committed
605 606
                               CONVERT_TO_BYTEPTR(temp2), W);                \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
607
    return aom_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
clang-format's avatar
clang-format committed
608 609 610
                                               dst, dst_stride, sse);        \
  }                                                                          \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
611
  uint32_t aom_highbd_12_sub_pixel_avg_variance##W##x##H##_c(                \
clang-format's avatar
clang-format committed
612 613 614 615 616 617 618
      const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
      const uint8_t *dst, int dst_stride, uint32_t *sse,                     \
      const uint8_t *second_pred) {                                          \
    uint16_t fdata3[(H + 1) * W];                                            \
    uint16_t temp2[H * W];                                                   \
    DECLARE_ALIGNED(16, uint16_t, temp3[H * W]);                             \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
619
    aom_highbd_var_filter_block2d_bil_first_pass(                            \
clang-format's avatar
clang-format committed
620
        src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
Yaowu Xu's avatar
Yaowu Xu committed
621
    aom_highbd_var_filter_block2d_bil_second_pass(                           \
clang-format's avatar
clang-format committed
622 623
        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);            \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
624
    aom_highbd_comp_avg_pred_c(temp3, second_pred, W, H,                     \
clang-format's avatar
clang-format committed
625 626
                               CONVERT_TO_BYTEPTR(temp2), W);                \
                                                                             \
Yaowu Xu's avatar
Yaowu Xu committed
627
    return aom_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
clang-format's avatar
clang-format committed
628 629
                                               dst, dst_stride, sse);        \
  }
Johann's avatar
Johann committed
630 631 632

/* All three forms of the variance are available in the same sizes. */
#define HIGHBD_VARIANCES(W, H) \
clang-format's avatar
clang-format committed
633 634 635
  HIGHBD_VAR(W, H)             \
  HIGHBD_SUBPIX_VAR(W, H)      \
  HIGHBD_SUBPIX_AVG_VAR(W, H)
Johann's avatar
Johann committed
636

Yaowu Xu's avatar
Yaowu Xu committed
637
#if CONFIG_AV1 && CONFIG_EXT_PARTITION
638 639 640
HIGHBD_VARIANCES(128, 128)
HIGHBD_VARIANCES(128, 64)
HIGHBD_VARIANCES(64, 128)
Yaowu Xu's avatar
Yaowu Xu committed
641
#endif  // CONFIG_AV1 && CONFIG_EXT_PARTITION
Johann's avatar
Johann committed
642 643 644 645 646 647 648 649 650 651 652 653 654
HIGHBD_VARIANCES(64, 64)
HIGHBD_VARIANCES(64, 32)
HIGHBD_VARIANCES(32, 64)
HIGHBD_VARIANCES(32, 32)
HIGHBD_VARIANCES(32, 16)
HIGHBD_VARIANCES(16, 32)
HIGHBD_VARIANCES(16, 16)
HIGHBD_VARIANCES(16, 8)
HIGHBD_VARIANCES(8, 16)
HIGHBD_VARIANCES(8, 8)
HIGHBD_VARIANCES(8, 4)
HIGHBD_VARIANCES(4, 8)
HIGHBD_VARIANCES(4, 4)
655 656 657
HIGHBD_VARIANCES(4, 2)
HIGHBD_VARIANCES(2, 4)
HIGHBD_VARIANCES(2, 2)
Johann's avatar
Johann committed
658

659 660 661 662 663 664 665
#if CONFIG_AV1 && CONFIG_EXT_PARTITION_TYPES
HIGHBD_VARIANCES(4, 16)
HIGHBD_VARIANCES(16, 4)
HIGHBD_VARIANCES(8, 32)
HIGHBD_VARIANCES(32, 8)
#endif

Johann's avatar
Johann committed
666 667 668 669 670 671 672 673
HIGHBD_GET_VAR(8)
HIGHBD_GET_VAR(16)

HIGHBD_MSE(16, 16)
HIGHBD_MSE(16, 8)
HIGHBD_MSE(8, 16)
HIGHBD_MSE(8, 8)

Yaowu Xu's avatar
Yaowu Xu committed
674
void aom_highbd_comp_avg_pred_c(uint16_t *comp_pred, const uint8_t *pred8,
675 676
                                int width, int height, const uint8_t *ref8,
                                int ref_stride) {
Johann's avatar
Johann committed
677 678 679
  int i, j;
  uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
Johann's avatar
Johann committed
680 681
  for (i = 0; i < height; ++i) {
    for (j = 0; j < width; ++j) {
Johann's avatar
Johann committed
682 683 684 685 686 687 688 689
      const int tmp = pred[j] + ref[j];
      comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
    }
    comp_pred += width;
    pred += width;
    ref += ref_stride;
  }
}
690

Yaowu Xu's avatar
Yaowu Xu committed
691
void aom_highbd_upsampled_pred_c(uint16_t *comp_pred, int width, int height,
692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720
                                 int subpel_x_q3, int subpel_y_q3,
                                 const uint8_t *ref8, int ref_stride, int bd) {
  if (!subpel_x_q3 && !subpel_y_q3) {
    const uint16_t *ref;
    int i;
    ref = CONVERT_TO_SHORTPTR(ref8);
    for (i = 0; i < height; i++) {
      memcpy(comp_pred, ref, width * sizeof(*comp_pred));
      comp_pred += width;
      ref += ref_stride;
    }
  } else {
    InterpFilterParams filter;
    filter = av1_get_interp_filter_params(EIGHTTAP_REGULAR);
    if (!subpel_y_q3) {
      const int16_t *kernel;
      kernel = av1_get_interp_filter_subpel_kernel(filter, subpel_x_q3 << 1);
      /*Directly call C version to allow this to work for small (2x2) sizes.*/
      aom_highbd_convolve8_horiz_c(ref8, ref_stride,
                                   CONVERT_TO_BYTEPTR(comp_pred), width, kernel,
                                   16, NULL, -1, width, height, bd);
    } else if (!subpel_x_q3) {
      const int16_t *kernel;
      kernel = av1_get_interp_filter_subpel_kernel(filter, subpel_y_q3 << 1);
      /*Directly call C version to allow this to work for small (2x2) sizes.*/
      aom_highbd_convolve8_vert_c(ref8, ref_stride,
                                  CONVERT_TO_BYTEPTR(comp_pred), width, NULL,
                                  -1, kernel, 16, width, height, bd);
    } else {
721 722
      DECLARE_ALIGNED(16, uint16_t,
                      temp[((MAX_SB_SIZE + 16) + 16) * MAX_SB_SIZE]);
723 724 725 726 727 728 729 730 731
      const int16_t *kernel_x;
      const int16_t *kernel_y;
      int intermediate_height;
      kernel_x = av1_get_interp_filter_subpel_kernel(filter, subpel_x_q3 << 1);
      kernel_y = av1_get_interp_filter_subpel_kernel(filter, subpel_y_q3 << 1);
      intermediate_height =
          (((height - 1) * 8 + subpel_y_q3) >> 3) + filter.taps;
      assert(intermediate_height <= (MAX_SB_SIZE * 2 + 16) + 16);
      /*Directly call C versions to allow this to work for small (2x2) sizes.*/
732 733 734 735
      aom_highbd_convolve8_horiz_c(ref8 - ref_stride * ((filter.taps >> 1) - 1),
                                   ref_stride, CONVERT_TO_BYTEPTR(temp),
                                   MAX_SB_SIZE, kernel_x, 16, NULL, -1, width,
                                   intermediate_height, bd);
736 737 738 739
      aom_highbd_convolve8_vert_c(
          CONVERT_TO_BYTEPTR(temp + MAX_SB_SIZE * ((filter.taps >> 1) - 1)),
          MAX_SB_SIZE, CONVERT_TO_BYTEPTR(comp_pred), width, NULL, -1, kernel_y,
          16, width, height, bd);
740 741 742 743
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
744
void aom_highbd_comp_avg_upsampled_pred_c(uint16_t *comp_pred,
clang-format's avatar
clang-format committed
745
                                          const uint8_t *pred8, int width,
746 747 748
                                          int height, int subpel_x_q3,
                                          int subpel_y_q3, const uint8_t *ref8,
                                          int ref_stride, int bd) {
749 750
  int i, j;

751 752 753
  const uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
  aom_highbd_upsampled_pred(comp_pred, width, height, subpel_x_q3, subpel_y_q3,
                            ref8, ref_stride, bd);
754 755
  for (i = 0; i < height; ++i) {
    for (j = 0; j < width; ++j) {
756
      comp_pred[j] = ROUND_POWER_OF_TWO(pred[j] + comp_pred[j], 1);
757 758 759 760 761
    }
    comp_pred += width;
    pred += width;
  }
}
762
#endif  // CONFIG_HIGHBITDEPTH
763

Yaowu Xu's avatar
Yaowu Xu committed
764
#if CONFIG_AV1 && CONFIG_EXT_INTER
765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785
void aom_comp_mask_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width,
                          int height, const uint8_t *ref, int ref_stride,
                          const uint8_t *mask, int mask_stride,
                          int invert_mask) {
  int i, j;

  for (i = 0; i < height; ++i) {
    for (j = 0; j < width; ++j) {
      if (!invert_mask)
        comp_pred[j] = AOM_BLEND_A64(mask[j], ref[j], pred[j]);
      else
        comp_pred[j] = AOM_BLEND_A64(mask[j], pred[j], ref[j]);
    }
    comp_pred += width;
    pred += width;
    ref += ref_stride;
    mask += mask_stride;
  }
}

void aom_comp_mask_upsampled_pred_c(uint8_t *comp_pred, const uint8_t *pred,
786 787
                                    int width, int height, int subpel_x_q3,
                                    int subpel_y_q3, const uint8_t *ref,
788 789 790 791
                                    int ref_stride, const uint8_t *mask,
                                    int mask_stride, int invert_mask) {
  int i, j;

792 793
  aom_upsampled_pred(comp_pred, width, height, subpel_x_q3, subpel_y_q3, ref,
                     ref_stride);
794 795 796
  for (i = 0; i < height; i++) {
    for (j = 0; j < width; j++) {
      if (!invert_mask)
797
        comp_pred[j] = AOM_BLEND_A64(mask[j], comp_pred[j], pred[j]);
798
      else
799
        comp_pred[j] = AOM_BLEND_A64(mask[j], pred[j], comp_pred[j]);
800 801 802 803 804 805 806
    }
    comp_pred += width;
    pred += width;
    mask += mask_stride;
  }
}

David Barker's avatar
David Barker committed
807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824
#define MASK_SUBPIX_VAR(W, H)                                                 \
  unsigned int aom_masked_sub_pixel_variance##W##x##H##_c(                    \
      const uint8_t *src, int src_stride, int xoffset, int yoffset,           \
      const uint8_t *ref, int ref_stride, const uint8_t *second_pred,         \
      const uint8_t *msk, int msk_stride, int invert_mask,                    \
      unsigned int *sse) {                                                    \
    uint16_t fdata3[(H + 1) * W];                                             \
    uint8_t temp2[H * W];                                                     \
    DECLARE_ALIGNED(16, uint8_t, temp3[H * W]);                               \
                                                                              \
    var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W,   \
                                      bilinear_filters_2t[xoffset]);          \
    var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,             \
                                       bilinear_filters_2t[yoffset]);         \
                                                                              \
    aom_comp_mask_pred_c(temp3, second_pred, W, H, temp2, W, msk, msk_stride, \
                         invert_mask);                                        \
    return aom_variance##W##x##H##_c(temp3, W, ref, ref_stride, sse);         \
825
  }
826

827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845
MASK_SUBPIX_VAR(4, 4)
MASK_SUBPIX_VAR(4, 8)
MASK_SUBPIX_VAR(8, 4)
MASK_SUBPIX_VAR(8, 8)
MASK_SUBPIX_VAR(8, 16)
MASK_SUBPIX_VAR(16, 8)
MASK_SUBPIX_VAR(16, 16)
MASK_SUBPIX_VAR(16, 32)
MASK_SUBPIX_VAR(32, 16)
MASK_SUBPIX_VAR(32, 32)
MASK_SUBPIX_VAR(32, 64)
MASK_SUBPIX_VAR(64, 32)
MASK_SUBPIX_VAR(64, 64)
#if CONFIG_EXT_PARTITION
MASK_SUBPIX_VAR(64, 128)
MASK_SUBPIX_VAR(128, 64)
MASK_SUBPIX_VAR(128, 128)
#endif  // CONFIG_EXT_PARTITION

846 847 848 849 850 851 852
#if CONFIG_EXT_PARTITION_TYPES
MASK_SUBPIX_VAR(4, 16)
MASK_SUBPIX_VAR(16, 4)
MASK_SUBPIX_VAR(8, 32)
MASK_SUBPIX_VAR(32, 8)
#endif

853
#if CONFIG_HIGHBITDEPTH
854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874
void aom_highbd_comp_mask_pred_c(uint16_t *comp_pred, const uint8_t *pred8,
                                 int width, int height, const uint8_t *ref8,
                                 int ref_stride, const uint8_t *mask,
                                 int mask_stride, int invert_mask) {
  int i, j;
  uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
  for (i = 0; i < height; ++i) {
    for (j = 0; j < width; ++j) {
      if (!invert_mask)
        comp_pred[j] = AOM_BLEND_A64(mask[j], ref[j], pred[j]);
      else
        comp_pred[j] = AOM_BLEND_A64(mask[j], pred[j], ref[j]);
    }
    comp_pred += width;
    pred += width;
    ref += ref_stride;
    mask += mask_stride;
  }
}

875 876 877 878
void aom_highbd_comp_mask_upsampled_pred_c(
    uint16_t *comp_pred, const uint8_t *pred8, int width, int height,
    int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8, int ref_stride,
    const uint8_t *mask, int mask_stride, int invert_mask, int bd) {
879 880 881
  int i, j;

  uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
882 883
  aom_highbd_upsampled_pred(comp_pred, width, height, subpel_x_q3, subpel_y_q3,
                            ref8, ref_stride, bd);
884 885 886
  for (i = 0; i < height; ++i) {
    for (j = 0; j < width; ++j) {
      if (!invert_mask)
887
        comp_pred[j] = AOM_BLEND_A64(mask[j], comp_pred[j], pred[j]);
888
      else
889
        comp_pred[j] = AOM_BLEND_A64(mask[j], pred[j], comp_pred[j]);
890 891 892 893 894 895 896 897
    }
    comp_pred += width;
    pred += width;
    mask += mask_stride;
  }
}

#define HIGHBD_MASK_SUBPIX_VAR(W, H)                                           \
David Barker's avatar
David Barker committed
898
  unsigned int aom_highbd_8_masked_sub_pixel_variance##W##x##H##_c(            \
899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919
      const uint8_t *src, int src_stride, int xoffset, int yoffset,            \
      const uint8_t *ref, int ref_stride, const uint8_t *second_pred,          \
      const uint8_t *msk, int msk_stride, int invert_mask,                     \
      unsigned int *sse) {                                                     \
    uint16_t fdata3[(H + 1) * W];                                              \
    uint16_t temp2[H * W];                                                     \
    DECLARE_ALIGNED(16, uint16_t, temp3[H * W]);                               \
                                                                               \
    aom_highbd_var_filter_block2d_bil_first_pass(                              \
        src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);   \
    aom_highbd_var_filter_block2d_bil_second_pass(                             \
        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);              \
                                                                               \
    aom_highbd_comp_mask_pred_c(temp3, second_pred, W, H,                      \
                                CONVERT_TO_BYTEPTR(temp2), W, msk, msk_stride, \
                                invert_mask);                                  \
                                                                               \
    return aom_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W,    \
                                              ref, ref_stride, sse);           \
  }                                                                            \
                                                                               \
David Barker's avatar
David Barker committed
920
  unsigned int aom_highbd_10_masked_sub_pixel_variance##W##x##H##_c(           \
921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941
      const uint8_t *src, int src_stride, int xoffset, int yoffset,            \
      const uint8_t *ref, int ref_stride, const uint8_t *second_pred,          \
      const uint8_t *msk, int msk_stride, int invert_mask,                     \
      unsigned int *sse) {                                                     \
    uint16_t fdata3[(H + 1) * W];                                              \
    uint16_t temp2[H * W];                                                     \
    DECLARE_ALIGNED(16, uint16_t, temp3[H * W]);                               \
                                                                               \
    aom_highbd_var_filter_block2d_bil_first_pass(                              \
        src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);   \
    aom_highbd_var_filter_block2d_bil_second_pass(                             \
        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);              \
                                                                               \
    aom_highbd_comp_mask_pred_c(temp3, second_pred, W, H,                      \
                                CONVERT_TO_BYTEPTR(temp2), W, msk, msk_stride, \
                                invert_mask);                                  \
                                                                               \
    return aom_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W,   \
                                               ref, ref_stride, sse);          \
  }                                                                            \
                                                                               \
David Barker's avatar
David Barker committed
942
  unsigned int aom_highbd_12_masked_sub_pixel_variance##W##x##H##_c(           \
943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961
      const uint8_t *src, int src_stride, int xoffset, int yoffset,            \
      const uint8_t *ref, int ref_stride, const uint8_t *second_pred,          \
      const uint8_t *msk, int msk_stride, int invert_mask,                     \
      unsigned int *sse) {                                                     \
    uint16_t fdata3[(H + 1) * W];                                              \
    uint16_t temp2[H * W];                                                     \
    DECLARE_ALIGNED(16, uint16_t, temp3[H * W]);                               \
                                                                               \
    aom_highbd_var_filter_block2d_bil_first_pass(                              \
        src, fdata3, src_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]);   \
    aom_highbd_var_filter_block2d_bil_second_pass(                             \
        fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]);              \
                                                                               \
    aom_highbd_comp_mask_pred_c(temp3, second_pred, W, H,                      \
                                CONVERT_TO_BYTEPTR(temp2), W, msk, msk_stride, \
                                invert_mask);                                  \
                                                                               \
    return aom_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W,   \
                                               ref, ref_stride, sse);          \
clang-format's avatar
clang-format committed
962
  }
963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981

HIGHBD_MASK_SUBPIX_VAR(4, 4)
HIGHBD_MASK_SUBPIX_VAR(4, 8)
HIGHBD_MASK_SUBPIX_VAR(8, 4)
HIGHBD_MASK_SUBPIX_VAR(8, 8)
HIGHBD_MASK_SUBPIX_VAR(8, 16)
HIGHBD_MASK_SUBPIX_VAR(16, 8)
HIGHBD_MASK_SUBPIX_VAR(16, 16)
HIGHBD_MASK_SUBPIX_VAR(16, 32)
HIGHBD_MASK_SUBPIX_VAR(32, 16)
HIGHBD_MASK_SUBPIX_VAR(32, 32)
HIGHBD_MASK_SUBPIX_VAR(32, 64)
HIGHBD_MASK_SUBPIX_VAR(64, 32)
HIGHBD_MASK_SUBPIX_VAR(64, 64)
#if CONFIG_EXT_PARTITION
HIGHBD_MASK_SUBPIX_VAR(64, 128)
HIGHBD_MASK_SUBPIX_VAR(128, 64)
HIGHBD_MASK_SUBPIX_VAR(128, 128)
#endif  // CONFIG_EXT_PARTITION
982 983 984 985 986 987 988

#if CONFIG_EXT_PARTITION_TYPES
HIGHBD_MASK_SUBPIX_VAR(4, 16)
HIGHBD_MASK_SUBPIX_VAR(16, 4)
HIGHBD_MASK_SUBPIX_VAR(8, 32)
HIGHBD_MASK_SUBPIX_VAR(32, 8)
#endif
989
#endif  // CONFIG_HIGHBITDEPTH
Yaowu Xu's avatar
Yaowu Xu committed
990
#endif  // CONFIG_AV1 && CONFIG_EXT_INTER
991

Yue Chen's avatar
Yue Chen committed
992
#if CONFIG_AV1 && CONFIG_MOTION_VAR
clang-format's avatar
clang-format committed
993
static INLINE void obmc_variance(const uint8_t *pre, int pre_stride,
994 995
                                 const int32_t *wsrc, const int32_t *mask,
                                 int w, int h, unsigned int *sse, int *sum) {
996 997 998 999 1000 1001 1002
  int i, j;

  *sse = 0;
  *sum = 0;

  for (i = 0; i < h; i++) {
    for (j = 0; j < w; j++) {
1003
      int diff = ROUND_POWER_OF_TWO_SIGNED(wsrc[j] - pre[j] * mask[j], 12);
1004 1005 1006 1007
      *sum += diff;
      *sse += diff * diff;
    }

1008 1009 1010
    pre += pre_stride;
    wsrc += w;
    mask += w;
1011 1012 1013
  }
}

1014 1015 1016 1017 1018 1019 1020
#define OBMC_VAR(W, H)                                            \
  unsigned int aom_obmc_variance##W##x##H##_c(                    \
      const uint8_t *pre, int pre_stride, const int32_t *wsrc,    \
      const int32_t *mask, unsigned int *sse) {                   \
    int sum;                                                      \
    obmc_variance(pre, pre_stride, wsrc, mask, W, H, sse, &sum);  \
    return *sse - (unsigned int)(((int64_t)sum * sum) / (W * H)); \
clang-format's avatar
clang-format committed
1021
  }
1022

clang-format's avatar
clang-format committed
1023
#define OBMC_SUBPIX_VAR(W, H)                                               \
Yaowu Xu's avatar
Yaowu Xu committed
1024
  unsigned int aom_obmc_sub_pixel_variance##W##x##H##_c(                    \
clang-format's avatar
clang-format committed
1025 1026 1027 1028 1029 1030 1031 1032 1033 1034
      const uint8_t *pre, int pre_stride, int xoffset, int yoffset,         \
      const int32_t *wsrc, const int32_t *mask, unsigned int *sse) {        \
    uint16_t fdata3[(H + 1) * W];                                           \
    uint8_t temp2[H * W];                                                   \
                                                                            \
    var_filter_block2d_bil_first_pass(pre, fdata3, pre_stride, 1, H + 1, W, \
                                      bilinear_filters_2t[xoffset]);        \
    var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,           \
                                       bilinear_filters_2t[yoffset]);       \
                                                                            \
Yaowu Xu's avatar
Yaowu Xu committed
1035
    return aom_obmc_variance##W##x##H##_c(temp2, W, wsrc, mask, sse);       \
clang-format's avatar
clang-format committed
1036
  }
1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087

OBMC_VAR(4, 4)
OBMC_SUBPIX_VAR(4, 4)

OBMC_VAR(4, 8)
OBMC_SUBPIX_VAR(4, 8)

OBMC_VAR(8, 4)
OBMC_SUBPIX_VAR(8, 4)

OBMC_VAR(8, 8)
OBMC_SUBPIX_VAR(8, 8)

OBMC_VAR(8, 16)
OBMC_SUBPIX_VAR(8, 16)

OBMC_VAR(16, 8)
OBMC_SUBPIX_VAR(16, 8)

OBMC_VAR(16, 16)
OBMC_SUBPIX_VAR(16, 16)

OBMC_VAR(16, 32)
OBMC_SUBPIX_VAR(16, 32)

OBMC_VAR(32, 16)
OBMC_SUBPIX_VAR(32, 16)

OBMC_VAR(32, 32)
OBMC_SUBPIX_VAR(32, 32)

OBMC_VAR(32, 64)
OBMC_SUBPIX_VAR(32, 64)

OBMC_VAR(64, 32)
OBMC_SUBPIX_VAR(64, 32)

OBMC_VAR(64, 64)
OBMC_SUBPIX_VAR(64, 64)

#if CONFIG_EXT_PARTITION
OBMC_VAR(64, 128)
OBMC_SUBPIX_VAR(64, 128)

OBMC_VAR(128, 64)
OBMC_SUBPIX_VAR(128, 64)

OBMC_VAR(128, 128)
OBMC_SUBPIX_VAR(128, 128)
#endif  // CONFIG_EXT_PARTITION

1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098
#if CONFIG_EXT_PARTITION_TYPES
OBMC_VAR(4, 16)
OBMC_SUBPIX_VAR(4, 16)
OBMC_VAR(16, 4)
OBMC_SUBPIX_VAR(16, 4)
OBMC_VAR(8, 32)
OBMC_SUBPIX_VAR(8, 32)
OBMC_VAR(32, 8)
OBMC_SUBPIX_VAR(32, 8)
#endif

1099
#if CONFIG_HIGHBITDEPTH
1100 1101
static INLINE void highbd_obmc_variance64(const uint8_t *pre8, int pre_stride,
                                          const int32_t *wsrc,
clang-format's avatar
clang-format committed
1102
                                          const int32_t *mask, int w, int h,
1103
                                          uint64_t *sse, int64_t *sum) {
1104
  int i, j;
1105
  uint16_t *pre = CONVERT_TO_SHORTPTR(pre8);
1106 1107 1108 1109 1110 1111

  *sse = 0;
  *sum = 0;

  for (i = 0; i < h; i++) {
    for (j = 0; j < w; j++) {
1112
      int diff = ROUND_POWER_OF_TWO_SIGNED(wsrc[j] - pre[j] * mask[j], 12);
1113 1114 1115 1116
      *sum += diff;
      *sse += diff * diff;
    }

1117 1118 1119
    pre += pre_stride;
    wsrc += w;
    mask += w;
1120 1121 1122
  }
}

1123 1124
static INLINE void highbd_obmc_variance(const uint8_t *pre8, int pre_stride,
                                        const int32_t *wsrc,
clang-format's avatar
clang-format committed
1125
                                        const int32_t *mask, int w, int h,
1126
                                        unsigned int *sse, int *sum) {
1127 1128
  int64_t sum64;
  uint64_t sse64;
1129
  highbd_obmc_variance64(pre8, pre_stride, wsrc, mask, w, h, &sse64, &sum64);
1130 1131 1132 1133
  *sum = (int)sum64;
  *sse = (unsigned int)sse64;
}

clang-format's avatar
clang-format committed
1134
static INLINE void highbd_10_obmc_variance(const uint8_t *pre8, int pre_stride,
1135
                                           const int32_t *wsrc,
clang-format's avatar
clang-format committed
1136
                                           const int32_t *mask, int w, int h,
1137
                                           unsigned int *sse, int *sum) {
1138 1139
  int64_t sum64;
  uint64_t sse64;
1140
  highbd_obmc_variance64(pre8, pre_stride, wsrc, mask, w, h, &sse64, &sum64);
1141 1142 1143 1144
  *sum = (int)ROUND_POWER_OF_TWO(sum64, 2);
  *sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 4);
}

clang-format's avatar
clang-format committed
1145
static INLINE void highbd_12_obmc_variance(const uint8_t *pre8, int pre_stride,
1146
                                           const int32_t *wsrc,
clang-format's avatar
clang-format committed
1147
                                           const int32_t *mask, int w, int h,
1148
                                           unsigned int *sse, int *sum) {
1149 1150
  int64_t sum64;
  uint64_t sse64;
1151
  highbd_obmc_variance64(pre8, pre_stride, wsrc, mask, w, h, &sse64, &sum64);
1152 1153 1154 1155
  *sum = (int)ROUND_POWER_OF_TWO(sum64, 4);
  *sse = (unsigned int)ROUND_POWER_OF_TWO(sse64, 8);
}

clang-format's avatar
clang-format committed
1156
#define HIGHBD_OBMC_VAR(W, H)                                              \
Yaowu Xu's avatar
Yaowu Xu committed
1157
  unsigned int aom_highbd_obmc_variance##W##x##H##_c(                      \