aom_dsp_rtcd_defs.pl 109 KB
Newer Older
Johann's avatar
Johann committed
1 2 3 4 5 6 7 8 9 10
##
## Copyright (c) 2017, Alliance for Open Media. All rights reserved
##
## This source code is subject to the terms of the BSD 2 Clause License and
## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
## was not distributed with this source code in the LICENSE file, you can
## obtain it at www.aomedia.org/license/software. If the Alliance for Open
## Media Patent License 1.0 was not distributed with this source code in the
## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
##
Yaowu Xu's avatar
Yaowu Xu committed
11 12 13 14 15 16 17 18
sub aom_dsp_forward_decls() {
print <<EOF
/*
 * DSP
 */

#include "aom/aom_integer.h"
#include "aom_dsp/aom_dsp_common.h"
19
#include "av1/common/enums.h"
Cheng Chen's avatar
Cheng Chen committed
20
#include "av1/common/blockd.h"
Yaowu Xu's avatar
Yaowu Xu committed
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53

EOF
}
forward_decls qw/aom_dsp_forward_decls/;

# optimizations which depend on multiple features
$avx2_ssse3 = '';
if ((aom_config("HAVE_AVX2") eq "yes") && (aom_config("HAVE_SSSE3") eq "yes")) {
  $avx2_ssse3 = 'avx2';
}

# functions that are 64 bit only.
$mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 = $avx_x86_64 = $avx2_x86_64 = '';
if ($opts{arch} eq "x86_64") {
  $mmx_x86_64 = 'mmx';
  $sse2_x86_64 = 'sse2';
  $ssse3_x86_64 = 'ssse3';
  $avx_x86_64 = 'avx';
  $avx2_x86_64 = 'avx2';
}

if (aom_config("CONFIG_EXT_PARTITION") eq "yes") {
  @block_widths = (4, 8, 16, 32, 64, 128)
} else {
  @block_widths = (4, 8, 16, 32, 64)
}

@block_sizes = ();
foreach $w (@block_widths) {
  foreach $h (@block_widths) {
    push @block_sizes, [$w, $h] if ($w <= 2*$h && $h <= 2*$w) ;
  }
}
54
if (aom_config("CONFIG_EXT_PARTITION_TYPES") eq "yes") {
55 56 57 58
  push @block_sizes, [4, 16];
  push @block_sizes, [16, 4];
  push @block_sizes, [8, 32];
  push @block_sizes, [32, 8];
59 60
  push @block_sizes, [16, 64];
  push @block_sizes, [64, 16];
61 62 63 64
  if (aom_config("CONFIG_EXT_PARTITION") eq "yes") {
      push @block_sizes, [32, 128];
      push @block_sizes, [128, 32];
  }
65
}
Yaowu Xu's avatar
Yaowu Xu committed
66

Yaowu Xu's avatar
Yaowu Xu committed
67
@tx_dims = (2, 4, 8, 16, 32, 64);
68 69 70 71 72
@tx_sizes = ();
foreach $w (@tx_dims) {
  push @tx_sizes, [$w, $w];
  foreach $h (@tx_dims) {
    push @tx_sizes, [$w, $h] if ($w >=4 && $h >=4 && ($w == 2*$h || $h == 2*$w));
73
    push @tx_sizes, [$w, $h] if ($w >=4 && $h >=4 && ($w == 4*$h || $h == 4*$w));
74 75 76
  }
}

77
@pred_names = qw/dc dc_top dc_left dc_128 v h d207e d63e d45e d117 d135 d153 paeth smooth smooth_v smooth_h/;
78

79 80 81
#
# Intra prediction
#
82

83 84
foreach (@tx_sizes) {
  ($w, $h) = @$_;
85 86 87
  foreach $pred_name (@pred_names) {
    add_proto "void", "aom_${pred_name}_predictor_${w}x${h}",
              "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
88 89
    add_proto "void", "aom_highbd_${pred_name}_predictor_${w}x${h}",
              "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
90 91
  }
}
92

93 94 95 96 97 98 99 100
specialize qw/aom_dc_top_predictor_4x4 msa neon sse2/;
specialize qw/aom_dc_top_predictor_4x8 sse2/;
specialize qw/aom_dc_top_predictor_8x4 sse2/;
specialize qw/aom_dc_top_predictor_8x8 neon msa sse2/;
specialize qw/aom_dc_top_predictor_8x16 sse2/;
specialize qw/aom_dc_top_predictor_16x8 sse2/;
specialize qw/aom_dc_top_predictor_16x16 neon msa sse2/;
specialize qw/aom_dc_top_predictor_16x32 sse2/;
101 102
specialize qw/aom_dc_top_predictor_32x16 sse2 avx2/;
specialize qw/aom_dc_top_predictor_32x32 msa neon sse2 avx2/;
103 104 105 106 107 108 109 110
specialize qw/aom_dc_left_predictor_4x4 msa neon sse2/;
specialize qw/aom_dc_left_predictor_4x8 sse2/;
specialize qw/aom_dc_left_predictor_8x4 sse2/;
specialize qw/aom_dc_left_predictor_8x8 neon msa sse2/;
specialize qw/aom_dc_left_predictor_8x16 sse2/;
specialize qw/aom_dc_left_predictor_16x8 sse2/;
specialize qw/aom_dc_left_predictor_16x16 neon msa sse2/;
specialize qw/aom_dc_left_predictor_16x32 sse2/;
111 112
specialize qw/aom_dc_left_predictor_32x16 sse2 avx2/;
specialize qw/aom_dc_left_predictor_32x32 msa neon sse2 avx2/;
113 114 115 116 117 118 119 120
specialize qw/aom_dc_128_predictor_4x4 msa neon sse2/;
specialize qw/aom_dc_128_predictor_4x8 sse2/;
specialize qw/aom_dc_128_predictor_8x4 sse2/;
specialize qw/aom_dc_128_predictor_8x8 neon msa sse2/;
specialize qw/aom_dc_128_predictor_8x16 sse2/;
specialize qw/aom_dc_128_predictor_16x8 sse2/;
specialize qw/aom_dc_128_predictor_16x16 neon msa sse2/;
specialize qw/aom_dc_128_predictor_16x32 sse2/;
121 122
specialize qw/aom_dc_128_predictor_32x16 sse2 avx2/;
specialize qw/aom_dc_128_predictor_32x32 msa neon sse2 avx2/;
123 124 125 126 127 128 129 130
specialize qw/aom_v_predictor_4x4 neon msa sse2/;
specialize qw/aom_v_predictor_4x8 sse2/;
specialize qw/aom_v_predictor_8x4 sse2/;
specialize qw/aom_v_predictor_8x8 neon msa sse2/;
specialize qw/aom_v_predictor_8x16 sse2/;
specialize qw/aom_v_predictor_16x8 sse2/;
specialize qw/aom_v_predictor_16x16 neon msa sse2/;
specialize qw/aom_v_predictor_16x32 sse2/;
131 132
specialize qw/aom_v_predictor_32x16 sse2 avx2/;
specialize qw/aom_v_predictor_32x32 neon msa sse2 avx2/;
133 134 135 136 137 138 139 140 141
specialize qw/aom_h_predictor_4x8 sse2/;
specialize qw/aom_h_predictor_4x4 neon dspr2 msa sse2/;
specialize qw/aom_h_predictor_8x4 sse2/;
specialize qw/aom_h_predictor_8x8 neon dspr2 msa sse2/;
specialize qw/aom_h_predictor_8x16 sse2/;
specialize qw/aom_h_predictor_16x8 sse2/;
specialize qw/aom_h_predictor_16x16 neon dspr2 msa sse2/;
specialize qw/aom_h_predictor_16x32 sse2/;
specialize qw/aom_h_predictor_32x16 sse2/;
142
specialize qw/aom_h_predictor_32x32 neon msa sse2 avx2/;
143 144 145 146 147
specialize qw/aom_paeth_predictor_4x4 ssse3/;
specialize qw/aom_paeth_predictor_4x8 ssse3/;
specialize qw/aom_paeth_predictor_8x4 ssse3/;
specialize qw/aom_paeth_predictor_8x8 ssse3/;
specialize qw/aom_paeth_predictor_8x16 ssse3/;
148 149 150 151 152
specialize qw/aom_paeth_predictor_16x8 ssse3 avx2/;
specialize qw/aom_paeth_predictor_16x16 ssse3 avx2/;
specialize qw/aom_paeth_predictor_16x32 ssse3 avx2/;
specialize qw/aom_paeth_predictor_32x16 ssse3 avx2/;
specialize qw/aom_paeth_predictor_32x32 ssse3 avx2/;
153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
specialize qw/aom_paeth_predictor_16x8 ssse3/;
specialize qw/aom_paeth_predictor_16x16 ssse3/;
specialize qw/aom_paeth_predictor_16x32 ssse3/;
specialize qw/aom_paeth_predictor_32x16 ssse3/;
specialize qw/aom_paeth_predictor_32x32 ssse3/;
specialize qw/aom_smooth_predictor_4x4 ssse3/;
specialize qw/aom_smooth_predictor_4x8 ssse3/;
specialize qw/aom_smooth_predictor_8x4 ssse3/;
specialize qw/aom_smooth_predictor_8x8 ssse3/;
specialize qw/aom_smooth_predictor_8x16 ssse3/;
specialize qw/aom_smooth_predictor_16x8 ssse3/;
specialize qw/aom_smooth_predictor_16x16 ssse3/;
specialize qw/aom_smooth_predictor_16x32 ssse3/;
specialize qw/aom_smooth_predictor_32x16 ssse3/;
specialize qw/aom_smooth_predictor_32x32 ssse3/;
168

Yaowu Xu's avatar
Yaowu Xu committed
169 170 171 172 173
specialize qw/aom_d135_predictor_4x4 neon/;
specialize qw/aom_d153_predictor_4x4 ssse3/;
specialize qw/aom_d153_predictor_8x8 ssse3/;
specialize qw/aom_d153_predictor_16x16 ssse3/;
specialize qw/aom_d153_predictor_32x32 ssse3/;
174

175 176 177
specialize qw/aom_dc_predictor_4x4 dspr2 msa neon sse2/;
specialize qw/aom_dc_predictor_8x8 dspr2 neon msa sse2/;
specialize qw/aom_dc_predictor_16x16 dspr2 neon msa sse2/;
178
specialize qw/aom_dc_predictor_32x32 msa neon sse2 avx2/;
179 180 181 182 183 184 185 186 187 188


# TODO(luoyi): Need to rewrite these.
# specialize qw/aom_dc_predictor_4x8 sse2/;
# specialize qw/aom_dc_predictor_8x4 sse2/;
# specialize qw/aom_dc_predictor_8x16 sse2/;
# specialize qw/aom_dc_predictor_16x8 sse2/;
# specialize qw/aom_dc_predictor_16x32 sse2/;
# specialize qw/aom_dc_predictor_32x16 sse2 avx2/;

189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220
specialize qw/aom_d207e_predictor_4x4 sse2/;
specialize qw/aom_d207e_predictor_4x8 sse2/;
specialize qw/aom_d207e_predictor_8x4 sse2/;
specialize qw/aom_d207e_predictor_8x8 sse2/;
specialize qw/aom_d207e_predictor_8x16 sse2/;
specialize qw/aom_d207e_predictor_16x8 sse2/;
specialize qw/aom_d207e_predictor_16x16 sse2/;
specialize qw/aom_d207e_predictor_16x32 sse2/;
specialize qw/aom_d207e_predictor_32x16 sse2/;
specialize qw/aom_d207e_predictor_32x32 sse2/;

specialize qw/aom_d63e_predictor_4x4 sse2 ssse3/;
specialize qw/aom_d63e_predictor_4x8 sse2/;
specialize qw/aom_d63e_predictor_8x4 sse2/;
specialize qw/aom_d63e_predictor_8x8 sse2/;
specialize qw/aom_d63e_predictor_8x16 sse2/;
specialize qw/aom_d63e_predictor_16x8 sse2/;
specialize qw/aom_d63e_predictor_16x16 sse2/;
specialize qw/aom_d63e_predictor_16x32 sse2/;
specialize qw/aom_d63e_predictor_32x16 sse2/;
specialize qw/aom_d63e_predictor_32x32 sse2/;

specialize qw/aom_d45e_predictor_4x4 ssse3/;
specialize qw/aom_d45e_predictor_4x8 ssse3/;
specialize qw/aom_d45e_predictor_8x4 ssse3/;
specialize qw/aom_d45e_predictor_8x8 ssse3/;
specialize qw/aom_d45e_predictor_8x16 ssse3/;
specialize qw/aom_d45e_predictor_16x8 ssse3/;
specialize qw/aom_d45e_predictor_16x16 ssse3/;
specialize qw/aom_d45e_predictor_16x32 ssse3/;
specialize qw/aom_d45e_predictor_32x16 ssse3/;
specialize qw/aom_d45e_predictor_32x32 ssse3/;
Yaowu Xu's avatar
Yaowu Xu committed
221 222

  specialize qw/aom_highbd_v_predictor_4x4 sse2/;
223 224
  specialize qw/aom_highbd_v_predictor_4x8 sse2/;
  specialize qw/aom_highbd_v_predictor_8x4 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
225
  specialize qw/aom_highbd_v_predictor_8x8 sse2/;
226 227
  specialize qw/aom_highbd_v_predictor_8x16 sse2/;
  specialize qw/aom_highbd_v_predictor_16x8 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
228
  specialize qw/aom_highbd_v_predictor_16x16 sse2/;
229 230
  specialize qw/aom_highbd_v_predictor_16x32 sse2/;
  specialize qw/aom_highbd_v_predictor_32x16 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
231
  specialize qw/aom_highbd_v_predictor_32x32 sse2/;
232

233
  specialize qw/aom_highbd_dc_predictor_4x4 sse2/;
234
  specialize qw/aom_highbd_dc_predictor_8x8 sse2/;
235
  specialize qw/aom_highbd_dc_predictor_16x16 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
236
  specialize qw/aom_highbd_dc_predictor_32x32 sse2/;
237 238 239 240 241 242 243 244 245
  
  # TODO(luoyi): Need to rewrite these
  # specialize qw/aom_highbd_dc_predictor_4x8 sse2/;
  # specialize qw/aom_highbd_dc_predictor_8x4 sse2/;
  # specialize qw/aom_highbd_dc_predictor_8x16 sse2/;
  # specialize qw/aom_highbd_dc_predictor_16x8 sse2/;
  # specialize qw/aom_highbd_dc_predictor_16x32 sse2/;
  # specialize qw/aom_highbd_dc_predictor_32x16 sse2/;

246 247 248 249 250 251 252 253 254 255
  specialize qw/aom_highbd_h_predictor_4x4 sse2/;
  specialize qw/aom_highbd_h_predictor_4x8 sse2/;
  specialize qw/aom_highbd_h_predictor_8x4 sse2/;
  specialize qw/aom_highbd_h_predictor_8x8 sse2/;
  specialize qw/aom_highbd_h_predictor_8x16 sse2/;
  specialize qw/aom_highbd_h_predictor_16x8 sse2/;
  specialize qw/aom_highbd_h_predictor_16x16 sse2/;
  specialize qw/aom_highbd_h_predictor_16x32 sse2/;
  specialize qw/aom_highbd_h_predictor_32x16 sse2/;
  specialize qw/aom_highbd_h_predictor_32x32 sse2/;
256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285
  specialize qw/aom_highbd_dc_left_predictor_4x4 sse2/;
  specialize qw/aom_highbd_dc_top_predictor_4x4 sse2/;
  specialize qw/aom_highbd_dc_128_predictor_4x4 sse2/;
  specialize qw/aom_highbd_dc_left_predictor_4x8 sse2/;
  specialize qw/aom_highbd_dc_top_predictor_4x8 sse2/;
  specialize qw/aom_highbd_dc_128_predictor_4x8 sse2/;
  specialize qw/aom_highbd_dc_left_predictor_8x4 sse2/;
  specialize qw/aom_highbd_dc_top_predictor_8x4 sse2/;
  specialize qw/aom_highbd_dc_128_predictor_8x4 sse2/;
  specialize qw/aom_highbd_dc_left_predictor_8x8 sse2/;
  specialize qw/aom_highbd_dc_top_predictor_8x8 sse2/;
  specialize qw/aom_highbd_dc_128_predictor_8x8 sse2/;
  specialize qw/aom_highbd_dc_left_predictor_8x16 sse2/;
  specialize qw/aom_highbd_dc_top_predictor_8x16 sse2/;
  specialize qw/aom_highbd_dc_128_predictor_8x16 sse2/;
  specialize qw/aom_highbd_dc_left_predictor_16x8 sse2/;
  specialize qw/aom_highbd_dc_top_predictor_16x8 sse2/;
  specialize qw/aom_highbd_dc_128_predictor_16x8 sse2/;
  specialize qw/aom_highbd_dc_left_predictor_16x16 sse2/;
  specialize qw/aom_highbd_dc_top_predictor_16x16 sse2/;
  specialize qw/aom_highbd_dc_128_predictor_16x16 sse2/;
  specialize qw/aom_highbd_dc_left_predictor_16x32 sse2/;
  specialize qw/aom_highbd_dc_top_predictor_16x32 sse2/;
  specialize qw/aom_highbd_dc_128_predictor_16x32 sse2/;
  specialize qw/aom_highbd_dc_left_predictor_32x16 sse2/;
  specialize qw/aom_highbd_dc_top_predictor_32x16 sse2/;
  specialize qw/aom_highbd_dc_128_predictor_32x16 sse2/;
  specialize qw/aom_highbd_dc_left_predictor_32x32 sse2/;
  specialize qw/aom_highbd_dc_top_predictor_32x32 sse2/;
  specialize qw/aom_highbd_dc_128_predictor_32x32 sse2/;
286

287 288 289 290 291 292 293 294 295 296 297 298
  specialize qw/aom_highbd_d117_predictor_4x4 sse2/;
  specialize qw/aom_highbd_d117_predictor_8x8 ssse3/;
  specialize qw/aom_highbd_d117_predictor_16x16 ssse3/;
  specialize qw/aom_highbd_d117_predictor_32x32 ssse3/;
  specialize qw/aom_highbd_d135_predictor_4x4 sse2/;
  specialize qw/aom_highbd_d135_predictor_8x8 ssse3/;
  specialize qw/aom_highbd_d135_predictor_16x16 ssse3/;
  specialize qw/aom_highbd_d135_predictor_32x32 ssse3/;
  specialize qw/aom_highbd_d153_predictor_4x4 sse2/;
  specialize qw/aom_highbd_d153_predictor_8x8 ssse3/;
  specialize qw/aom_highbd_d153_predictor_16x16 ssse3/;
  specialize qw/aom_highbd_d153_predictor_32x32 ssse3/;
299 300 301 302 303 304 305 306 307 308 309

  specialize qw/aom_highbd_d45e_predictor_4x4 sse2/;
  specialize qw/aom_highbd_d45e_predictor_4x8 sse2/;
  specialize qw/aom_highbd_d45e_predictor_8x4 sse2/;
  specialize qw/aom_highbd_d45e_predictor_8x8 sse2/;
  specialize qw/aom_highbd_d45e_predictor_8x16 sse2/;
  specialize qw/aom_highbd_d45e_predictor_16x8 avx2/;
  specialize qw/aom_highbd_d45e_predictor_16x16 avx2/;
  specialize qw/aom_highbd_d45e_predictor_16x32 avx2/;
  specialize qw/aom_highbd_d45e_predictor_32x16 avx2/;
  specialize qw/aom_highbd_d45e_predictor_32x32 avx2/;
310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331

  specialize qw/aom_highbd_d207e_predictor_4x4 sse2/;
  specialize qw/aom_highbd_d207e_predictor_4x8 sse2/;
  specialize qw/aom_highbd_d207e_predictor_8x4 sse2/;
  specialize qw/aom_highbd_d207e_predictor_8x8 sse2/;
  specialize qw/aom_highbd_d207e_predictor_8x16 sse2/;
  specialize qw/aom_highbd_d207e_predictor_16x8 sse2/;
  specialize qw/aom_highbd_d207e_predictor_16x16 sse2/;
  specialize qw/aom_highbd_d207e_predictor_16x32 sse2/;
  specialize qw/aom_highbd_d207e_predictor_32x16 avx2/;
  specialize qw/aom_highbd_d207e_predictor_32x32 avx2/;

  specialize qw/aom_highbd_d63e_predictor_4x4 sse2/;
  specialize qw/aom_highbd_d63e_predictor_4x8 sse2/;
  specialize qw/aom_highbd_d63e_predictor_8x4 sse2/;
  specialize qw/aom_highbd_d63e_predictor_8x8 sse2/;
  specialize qw/aom_highbd_d63e_predictor_8x16 sse2/;
  specialize qw/aom_highbd_d63e_predictor_16x8 avx2/;
  specialize qw/aom_highbd_d63e_predictor_16x16 avx2/;
  specialize qw/aom_highbd_d63e_predictor_16x32 avx2/;
  specialize qw/aom_highbd_d63e_predictor_32x16 avx2/;
  specialize qw/aom_highbd_d63e_predictor_32x32 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
332 333 334 335

#
# Sub Pixel Filters
#
Fergus Simpson's avatar
Fergus Simpson committed
336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356
add_proto qw/void aom_convolve_copy/,             "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve_avg/,              "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8/,                 "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_horiz/,           "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_vert/,            "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_avg/,             "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_avg_horiz/,       "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_avg_vert/,        "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_scaled_2d/,                 "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_scaled_horiz/,              "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_scaled_vert/,               "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_scaled_avg_2d/,             "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_scaled_avg_horiz/,          "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_scaled_avg_vert/,           "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";

add_proto qw/void aom_convolve8_horiz_scale/,     "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int subpel_x, int x_step_q4, const int16_t *filter_y, int subpel_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_vert_scale/,      "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int subpel_x, int x_step_q4, const int16_t *filter_y, int subpel_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_avg_horiz_scale/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int subpel_x, int x_step_q4, const int16_t *filter_y, int subpel_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_avg_vert_scale/,  "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int subpel_x, int x_step_q4, const int16_t *filter_y, int subpel_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_scale/,           "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int subpel_x, int x_step_q4, const int16_t *filter_y, int subpel_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_avg_scale/,       "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int subpel_x, int x_step_q4, const int16_t *filter_y, int subpel_y, int y_step_q4, int w, int h";
Yaowu Xu's avatar
Yaowu Xu committed
357 358 359 360 361 362 363 364 365 366 367

specialize qw/aom_convolve_copy       sse2      /;
specialize qw/aom_convolve_avg        sse2      /;
specialize qw/aom_convolve8           sse2 ssse3/, "$avx2_ssse3";
specialize qw/aom_convolve8_horiz     sse2 ssse3/, "$avx2_ssse3";
specialize qw/aom_convolve8_vert      sse2 ssse3/, "$avx2_ssse3";
specialize qw/aom_convolve8_avg       sse2 ssse3/;
specialize qw/aom_convolve8_avg_horiz sse2 ssse3/;
specialize qw/aom_convolve8_avg_vert  sse2 ssse3/;
specialize qw/aom_scaled_2d                ssse3/;

368 369 370 371
if (aom_config("CONFIG_LOOP_RESTORATION") eq "yes") {
  add_proto qw/void aom_convolve8_add_src/,       "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
  add_proto qw/void aom_convolve8_add_src_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
  add_proto qw/void aom_convolve8_add_src_vert/,  "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
372 373 374
  add_proto qw/void aom_convolve8_add_src_hip/,       "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
  add_proto qw/void aom_convolve8_add_src_horiz_hip/, "const uint8_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
  add_proto qw/void aom_convolve8_add_src_vert_hip/,  "const uint16_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
375

376 377 378
  specialize qw/aom_convolve8_add_src ssse3/;
  specialize qw/aom_convolve8_add_src_horiz ssse3/;
  specialize qw/aom_convolve8_add_src_vert ssse3/;
379
  specialize qw/aom_convolve8_add_src_hip sse2/;
380
  specialize qw/aom_convolve8_add_src_hip avx2/;
381 382
}  # CONFIG_LOOP_RESTORATION

Yaowu Xu's avatar
Yaowu Xu committed
383 384 385 386 387 388 389 390 391 392 393 394
# TODO(any): These need to be extended to up to 128x128 block sizes
if (!(aom_config("CONFIG_AV1") eq "yes" && aom_config("CONFIG_EXT_PARTITION") eq "yes")) {
  specialize qw/aom_convolve_copy       neon dspr2 msa/;
  specialize qw/aom_convolve_avg        neon dspr2 msa/;
  specialize qw/aom_convolve8           neon dspr2 msa/;
  specialize qw/aom_convolve8_horiz     neon dspr2 msa/;
  specialize qw/aom_convolve8_vert      neon dspr2 msa/;
  specialize qw/aom_convolve8_avg       neon dspr2 msa/;
  specialize qw/aom_convolve8_avg_horiz neon dspr2 msa/;
  specialize qw/aom_convolve8_avg_vert  neon dspr2 msa/;
}

395 396
add_proto qw/void aom_highbd_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
specialize qw/aom_highbd_convolve_copy sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
397

398 399
add_proto qw/void aom_highbd_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
specialize qw/aom_highbd_convolve_avg sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
400

401 402
add_proto qw/void aom_highbd_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
specialize qw/aom_highbd_convolve8 avx2/, "$sse2_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
403

404 405
add_proto qw/void aom_highbd_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
specialize qw/aom_highbd_convolve8_horiz avx2/, "$sse2_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
406

407 408
add_proto qw/void aom_highbd_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
specialize qw/aom_highbd_convolve8_vert avx2/, "$sse2_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
409

410 411
add_proto qw/void aom_highbd_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
specialize qw/aom_highbd_convolve8_avg avx2/, "$sse2_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
412

413 414
add_proto qw/void aom_highbd_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
specialize qw/aom_highbd_convolve8_avg_horiz avx2/, "$sse2_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
415

416 417
add_proto qw/void aom_highbd_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
specialize qw/aom_highbd_convolve8_avg_vert avx2/, "$sse2_x86_64";
418

419 420 421
if (aom_config("CONFIG_LOOP_RESTORATION") eq "yes") {
  add_proto qw/void aom_highbd_convolve8_add_src/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
  add_proto qw/void aom_highbd_convolve8_add_src_hip/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
422

423
  specialize qw/aom_highbd_convolve8_add_src/, "$sse2_x86_64";
424
  specialize qw/aom_highbd_convolve8_add_src_hip ssse3 avx2/;
425
}  # CONFIG_LOOP_RESTORATION
Yaowu Xu's avatar
Yaowu Xu committed
426 427 428 429

#
# Loopfilter
#
430 431
add_proto qw/void aom_lpf_vertical_14/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
specialize qw/aom_lpf_vertical_14 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
432

433 434
add_proto qw/void aom_lpf_vertical_14_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
specialize qw/aom_lpf_vertical_14_dual sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
435

436
add_proto qw/void aom_lpf_vertical_6/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
Yaowu Xu's avatar
Yaowu Xu committed
437
specialize qw/aom_lpf_vertical_6 sse2/;
438

Yaowu Xu's avatar
Yaowu Xu committed
439
add_proto qw/void aom_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
Yaowu Xu's avatar
Yaowu Xu committed
440
specialize qw/aom_lpf_vertical_8 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
441 442 443 444

add_proto qw/void aom_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";

add_proto qw/void aom_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
Yaowu Xu's avatar
Yaowu Xu committed
445
specialize qw/aom_lpf_vertical_4 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
446 447 448

add_proto qw/void aom_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";

449 450
add_proto qw/void aom_lpf_horizontal_14/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
specialize qw/aom_lpf_horizontal_14 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
451

452 453
add_proto qw/void aom_lpf_horizontal_14_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
specialize qw/aom_lpf_horizontal_14_dual sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
454

455
add_proto qw/void aom_lpf_horizontal_6/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
Yaowu Xu's avatar
Yaowu Xu committed
456
specialize qw/aom_lpf_horizontal_6 sse2/;
457

Yaowu Xu's avatar
Yaowu Xu committed
458
add_proto qw/void aom_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
Yaowu Xu's avatar
Yaowu Xu committed
459
specialize qw/aom_lpf_horizontal_8 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
460 461 462 463

add_proto qw/void aom_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";

add_proto qw/void aom_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
Yaowu Xu's avatar
Yaowu Xu committed
464
specialize qw/aom_lpf_horizontal_4 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
465 466 467

add_proto qw/void aom_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";

468 469
add_proto qw/void aom_highbd_lpf_vertical_14/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/aom_highbd_lpf_vertical_14 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
470

471 472
add_proto qw/void aom_highbd_lpf_vertical_14_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh,   int bd";
specialize qw/aom_highbd_lpf_vertical_14_dual sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
473

Yaowu Xu's avatar
Yaowu Xu committed
474 475
add_proto qw/void aom_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/aom_highbd_lpf_vertical_8 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
476

Yaowu Xu's avatar
Yaowu Xu committed
477 478
add_proto qw/void aom_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
specialize qw/aom_highbd_lpf_vertical_8_dual sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
479

Yaowu Xu's avatar
Yaowu Xu committed
480 481
add_proto qw/void aom_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/aom_highbd_lpf_vertical_4 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
482

Yaowu Xu's avatar
Yaowu Xu committed
483 484
add_proto qw/void aom_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
specialize qw/aom_highbd_lpf_vertical_4_dual sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
485

486 487
add_proto qw/void aom_highbd_lpf_horizontal_14/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/aom_highbd_lpf_horizontal_14 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
488

489 490
add_proto qw/void aom_highbd_lpf_horizontal_14_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/aom_highbd_lpf_horizontal_14_dual sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
491

Yaowu Xu's avatar
Yaowu Xu committed
492 493
add_proto qw/void aom_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/aom_highbd_lpf_horizontal_8 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
494

Yaowu Xu's avatar
Yaowu Xu committed
495 496
add_proto qw/void aom_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
specialize qw/aom_highbd_lpf_horizontal_8_dual sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
497

Yaowu Xu's avatar
Yaowu Xu committed
498 499
add_proto qw/void aom_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/aom_highbd_lpf_horizontal_4 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
500

Yaowu Xu's avatar
Yaowu Xu committed
501 502
add_proto qw/void aom_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
specialize qw/aom_highbd_lpf_horizontal_4_dual sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
503

504 505 506 507
# Helper functions.
add_proto qw/void av1_round_shift_array/, "int32_t *arr, int size, int bit";
specialize "av1_round_shift_array", qw/sse4_1/;

Yaowu Xu's avatar
Yaowu Xu committed
508 509 510 511 512 513 514
#
# Encoder functions.
#

#
# Forward transform
#
Yushin Cho's avatar
Yushin Cho committed
515
if (aom_config("CONFIG_AV1_ENCODER") eq "yes"){
516 517 518 519 520 521 522
    add_proto qw/void aom_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct4x4 sse2/;

    add_proto qw/void aom_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct4x4_1 sse2/;

    add_proto qw/void aom_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
523
    specialize qw/aom_fdct8x8 sse2/, "$ssse3_x86_64";
524 525 526 527 528 529 530 531 532 533 534

    add_proto qw/void aom_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct16x16 sse2/;

    add_proto qw/void aom_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct32x32 sse2 avx2/;

    add_proto qw/void aom_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct32x32_rd sse2 avx2/;

    # High bit depth
535 536
    add_proto qw/void aom_highbd_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_highbd_fdct4x4 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
537

538 539
    add_proto qw/void aom_highbd_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_highbd_fdct8x8 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
540

541 542
    add_proto qw/void aom_highbd_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_highbd_fdct16x16 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
543

544 545
    add_proto qw/void aom_highbd_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_highbd_fdct32x32 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
546

547 548
    add_proto qw/void aom_highbd_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_highbd_fdct32x32_rd sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
549 550 551 552 553 554 555 556 557 558 559 560 561 562 563

}  # CONFIG_AV1_ENCODER

#
# Inverse transform
if (aom_config("CONFIG_AV1") eq "yes") {
  add_proto qw/void aom_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

  add_proto qw/void aom_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_iwht4x4_16_add sse2/;

  add_proto qw/void aom_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

  add_proto qw/void aom_highbd_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

James Zern's avatar
James Zern committed
564 565
  add_proto qw/void aom_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct4x4_16_add sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
566

James Zern's avatar
James Zern committed
567 568
  add_proto qw/void aom_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct4x4_1_add sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
569

James Zern's avatar
James Zern committed
570 571
  add_proto qw/void aom_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct8x8_64_add sse2 ssse3/;
Yaowu Xu's avatar
Yaowu Xu committed
572

James Zern's avatar
James Zern committed
573 574
  add_proto qw/void aom_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct8x8_12_add sse2 ssse3/;
Yaowu Xu's avatar
Yaowu Xu committed
575

James Zern's avatar
James Zern committed
576 577
  add_proto qw/void aom_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct8x8_1_add sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
578

James Zern's avatar
James Zern committed
579 580
  add_proto qw/void aom_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct16x16_256_add sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
581

James Zern's avatar
James Zern committed
582 583
  add_proto qw/void aom_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct16x16_38_add avx2/;
584

James Zern's avatar
James Zern committed
585 586
  add_proto qw/void aom_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct16x16_10_add sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
587

James Zern's avatar
James Zern committed
588 589
  add_proto qw/void aom_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct16x16_1_add sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
590

James Zern's avatar
James Zern committed
591 592
  add_proto qw/void aom_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct32x32_1024_add sse2 ssse3 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
593

James Zern's avatar
James Zern committed
594 595 596 597
  add_proto qw/void aom_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct32x32_135_add sse2 ssse3 avx2/;
  # Need to add 135 eob idct32x32 implementations.
  $aom_idct32x32_135_add_sse2=aom_idct32x32_1024_add_sse2;
Yaowu Xu's avatar
Yaowu Xu committed
598

James Zern's avatar
James Zern committed
599 600
  add_proto qw/void aom_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct32x32_34_add sse2 ssse3 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
601

James Zern's avatar
James Zern committed
602 603
  add_proto qw/void aom_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct32x32_1_add sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
604 605 606 607 608
}  # CONFIG_AV1

#
# Quantization
#
Thomas Davies's avatar
Thomas Davies committed
609
if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
James Zern's avatar
James Zern committed
610 611
  add_proto qw/void aom_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
  specialize qw/aom_quantize_b sse2/, "$ssse3_x86_64", "$avx_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
612

James Zern's avatar
James Zern committed
613 614
  add_proto qw/void aom_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
  specialize qw/aom_quantize_b_32x32/, "$ssse3_x86_64", "$avx_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
615

James Zern's avatar
James Zern committed
616
  add_proto qw/void aom_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
Thomas Davies's avatar
Thomas Davies committed
617
}  # CONFIG_AV1_ENCODER
618

Thomas Davies's avatar
Thomas Davies committed
619 620 621
if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
  add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
  specialize qw/aom_highbd_quantize_b sse2 avx2/;
622

Thomas Davies's avatar
Thomas Davies committed
623 624
  add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
  specialize qw/aom_highbd_quantize_b_32x32 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
625

Thomas Davies's avatar
Thomas Davies committed
626
  add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
627

Thomas Davies's avatar
Thomas Davies committed
628
}  # CONFIG_AV1_ENCODER
Yaowu Xu's avatar
Yaowu Xu committed
629 630 631 632
if (aom_config("CONFIG_AV1") eq "yes") {
  #
  # Alpha blending with mask
  #
633
  add_proto qw/void aom_blend_a64_d32_mask/, "int32_t *dst, uint32_t dst_stride, const int32_t *src0, uint32_t src0_stride, const int32_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx";
Yaowu Xu's avatar
Yaowu Xu committed
634 635 636 637 638 639 640 641 642 643 644 645 646
  add_proto qw/void aom_blend_a64_mask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx";
  add_proto qw/void aom_blend_a64_hmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w";
  add_proto qw/void aom_blend_a64_vmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w";
  specialize "aom_blend_a64_mask", qw/sse4_1/;
  specialize "aom_blend_a64_hmask", qw/sse4_1/;
  specialize "aom_blend_a64_vmask", qw/sse4_1/;

    add_proto qw/void aom_highbd_blend_a64_mask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx, int bd";
    add_proto qw/void aom_highbd_blend_a64_hmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w, int bd";
    add_proto qw/void aom_highbd_blend_a64_vmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w, int bd";
    specialize "aom_highbd_blend_a64_mask", qw/sse4_1/;
    specialize "aom_highbd_blend_a64_hmask", qw/sse4_1/;
    specialize "aom_highbd_blend_a64_vmask", qw/sse4_1/;
647

Yaowu Xu's avatar
Yaowu Xu committed
648 649
}  # CONFIG_AV1

650
if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
651
  #
James Zern's avatar
James Zern committed
652
  # Block subtraction
Yaowu Xu's avatar
Yaowu Xu committed
653
  #
James Zern's avatar
James Zern committed
654 655 656 657 658 659 660 661 662 663 664 665
  add_proto qw/void aom_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
  specialize qw/aom_subtract_block neon msa sse2/;

  if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
    #
    # Sum of Squares
    #
    add_proto qw/uint64_t aom_sum_squares_2d_i16/, "const int16_t *src, int stride, int width, int height";
    specialize qw/aom_sum_squares_2d_i16 sse2/;

    add_proto qw/uint64_t aom_sum_squares_i16/, "const int16_t *src, uint32_t N";
    specialize qw/aom_sum_squares_i16 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
666 667
  }

James Zern's avatar
James Zern committed
668

Yaowu Xu's avatar
Yaowu Xu committed
669
  #
James Zern's avatar
James Zern committed
670
  # Avg
Yaowu Xu's avatar
Yaowu Xu committed
671
  #
James Zern's avatar
James Zern committed
672 673 674 675 676 677 678
  if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
    #
    # Avg
    #
    specialize qw/aom_avg_8x8 sse2 neon msa/;
      add_proto qw/void aom_highbd_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride, int bd";
      specialize qw/aom_highbd_subtract_block sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
679

James Zern's avatar
James Zern committed
680 681 682 683 684 685
    #
    # Minmax
    #
    add_proto qw/void aom_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
    specialize qw/aom_minmax_8x8 sse2 neon/;
      add_proto qw/void aom_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
Yaowu Xu's avatar
Yaowu Xu committed
686

James Zern's avatar
James Zern committed
687 688
    add_proto qw/void aom_hadamard_8x8/, "const int16_t *src_diff, int src_stride, int16_t *coeff";
    specialize qw/aom_hadamard_8x8 sse2 neon/, "$ssse3_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
689

James Zern's avatar
James Zern committed
690 691
    add_proto qw/void aom_hadamard_16x16/, "const int16_t *src_diff, int src_stride, int16_t *coeff";
    specialize qw/aom_hadamard_16x16 sse2 neon/;
Yaowu Xu's avatar
Yaowu Xu committed
692

James Zern's avatar
James Zern committed
693 694
    add_proto qw/int aom_satd/, "const int16_t *coeff, int length";
    specialize qw/aom_satd sse2 neon/;
Yaowu Xu's avatar
Yaowu Xu committed
695

James Zern's avatar
James Zern committed
696 697
    add_proto qw/void aom_int_pro_row/, "int16_t *hbuf, const uint8_t *ref, int ref_stride, int height";
    specialize qw/aom_int_pro_row sse2 neon/;
Yaowu Xu's avatar
Yaowu Xu committed
698

James Zern's avatar
James Zern committed
699 700
    add_proto qw/int16_t aom_int_pro_col/, "const uint8_t *ref, int width";
    specialize qw/aom_int_pro_col sse2 neon/;
Yaowu Xu's avatar
Yaowu Xu committed
701

James Zern's avatar
James Zern committed
702 703 704
    add_proto qw/int aom_vector_var/, "const int16_t *ref, const int16_t *src, int bwl";
    specialize qw/aom_vector_var neon sse2/;
  }  # CONFIG_AV1_ENCODER
Yaowu Xu's avatar
Yaowu Xu committed
705

James Zern's avatar
James Zern committed
706 707 708
  #
  # Single block SAD / Single block Avg SAD
  #
Yaowu Xu's avatar
Yaowu Xu committed
709 710
  foreach (@block_sizes) {
    ($w, $h) = @$_;
James Zern's avatar
James Zern committed
711 712
    add_proto qw/unsigned int/, "aom_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
    add_proto qw/unsigned int/, "aom_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
Cheng Chen's avatar
Cheng Chen committed
713 714 715
    if (aom_config("CONFIG_JNT_COMP") eq "yes") {
      add_proto qw/unsigned int/, "aom_jnt_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, const JNT_COMP_PARAMS *jcp_param";
    }
Yaowu Xu's avatar
Yaowu Xu committed
716 717
  }

James Zern's avatar
James Zern committed
718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750
  specialize qw/aom_sad128x128    avx2          sse2/;
  specialize qw/aom_sad128x64     avx2          sse2/;
  specialize qw/aom_sad64x128     avx2          sse2/;
  specialize qw/aom_sad64x64      avx2 neon msa sse2/;
  specialize qw/aom_sad64x32      avx2      msa sse2/;
  specialize qw/aom_sad32x64      avx2      msa sse2/;
  specialize qw/aom_sad32x32      avx2 neon msa sse2/;
  specialize qw/aom_sad32x16      avx2      msa sse2/;
  specialize qw/aom_sad16x32                msa sse2/;
  specialize qw/aom_sad16x16           neon msa sse2/;
  specialize qw/aom_sad16x8            neon msa sse2/;
  specialize qw/aom_sad8x16            neon msa sse2/;
  specialize qw/aom_sad8x8             neon msa sse2/;
  specialize qw/aom_sad8x4                  msa sse2/;
  specialize qw/aom_sad4x8                  msa sse2/;
  specialize qw/aom_sad4x4             neon msa sse2/;

  specialize qw/aom_sad128x128_avg avx2     sse2/;
  specialize qw/aom_sad128x64_avg  avx2     sse2/;
  specialize qw/aom_sad64x128_avg  avx2     sse2/;
  specialize qw/aom_sad64x64_avg   avx2 msa sse2/;
  specialize qw/aom_sad64x32_avg   avx2 msa sse2/;
  specialize qw/aom_sad32x64_avg   avx2 msa sse2/;
  specialize qw/aom_sad32x32_avg   avx2 msa sse2/;
  specialize qw/aom_sad32x16_avg   avx2 msa sse2/;
  specialize qw/aom_sad16x32_avg        msa sse2/;
  specialize qw/aom_sad16x16_avg        msa sse2/;
  specialize qw/aom_sad16x8_avg         msa sse2/;
  specialize qw/aom_sad8x16_avg         msa sse2/;
  specialize qw/aom_sad8x8_avg          msa sse2/;
  specialize qw/aom_sad8x4_avg          msa sse2/;
  specialize qw/aom_sad4x8_avg          msa sse2/;
  specialize qw/aom_sad4x4_avg          msa sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
751

752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767
  if (aom_config("CONFIG_EXT_PARTITION_TYPES") eq "yes") {
    specialize qw/aom_sad4x16      sse2/;
    specialize qw/aom_sad16x4      sse2/;
    specialize qw/aom_sad8x32      sse2/;
    specialize qw/aom_sad32x8      sse2/;
    specialize qw/aom_sad16x64     sse2/;
    specialize qw/aom_sad64x16     sse2/;

    specialize qw/aom_sad4x16_avg  sse2/;
    specialize qw/aom_sad16x4_avg  sse2/;
    specialize qw/aom_sad8x32_avg  sse2/;
    specialize qw/aom_sad32x8_avg  sse2/;
    specialize qw/aom_sad16x64_avg sse2/;
    specialize qw/aom_sad64x16_avg sse2/;
  }

768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811
  if (aom_config("CONFIG_JNT_COMP") eq "yes") {
    specialize qw/aom_jnt_sad128x128_avg ssse3/;
    specialize qw/aom_jnt_sad128x64_avg  ssse3/;
    specialize qw/aom_jnt_sad64x128_avg  ssse3/;
    specialize qw/aom_jnt_sad64x64_avg   ssse3/;
    specialize qw/aom_jnt_sad64x32_avg   ssse3/;
    specialize qw/aom_jnt_sad32x64_avg   ssse3/;
    specialize qw/aom_jnt_sad32x32_avg   ssse3/;
    specialize qw/aom_jnt_sad32x16_avg   ssse3/;
    specialize qw/aom_jnt_sad16x32_avg   ssse3/;
    specialize qw/aom_jnt_sad16x16_avg   ssse3/;
    specialize qw/aom_jnt_sad16x8_avg    ssse3/;
    specialize qw/aom_jnt_sad8x16_avg    ssse3/;
    specialize qw/aom_jnt_sad8x8_avg     ssse3/;
    specialize qw/aom_jnt_sad8x4_avg     ssse3/;
    specialize qw/aom_jnt_sad4x8_avg     ssse3/;
    specialize qw/aom_jnt_sad4x4_avg     ssse3/;

    if (aom_config("CONFIG_EXT_PARTITION_TYPES") eq "yes") {
      specialize qw/aom_jnt_sad4x16_avg     ssse3/;
      specialize qw/aom_jnt_sad16x4_avg     ssse3/;
      specialize qw/aom_jnt_sad8x32_avg     ssse3/;
      specialize qw/aom_jnt_sad32x8_avg     ssse3/;
      specialize qw/aom_jnt_sad16x64_avg     ssse3/;
      specialize qw/aom_jnt_sad64x16_avg     ssse3/;
      specialize qw/aom_jnt_sad32x128_avg     ssse3/;
      specialize qw/aom_jnt_sad128x32_avg     ssse3/;
    }

    add_proto qw/unsigned int/, "aom_sad4xh", "const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, int width, int height";
    add_proto qw/unsigned int/, "aom_sad8xh", "const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, int width, int height";
    add_proto qw/unsigned int/, "aom_sad16xh", "const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, int width, int height";
    add_proto qw/unsigned int/, "aom_sad32xh", "const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, int width, int height";
    add_proto qw/unsigned int/, "aom_sad64xh", "const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, int width, int height";
    add_proto qw/unsigned int/, "aom_sad128xh", "const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, int width, int height";

    specialize qw/aom_sad4xh   sse2/;
    specialize qw/aom_sad8xh   sse2/;
    specialize qw/aom_sad16xh  sse2/;
    specialize qw/aom_sad32xh  sse2/;
    specialize qw/aom_sad64xh  sse2/;
    specialize qw/aom_sad128xh sse2/;
  }

812

Yaowu Xu's avatar
Yaowu Xu committed
813 814
    foreach (@block_sizes) {
      ($w, $h) = @$_;
James Zern's avatar
James Zern committed
815 816 817 818 819 820
      add_proto qw/unsigned int/, "aom_highbd_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
      add_proto qw/unsigned int/, "aom_highbd_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
      if ($w != 128 && $h != 128 && $w != 4) {
        specialize "aom_highbd_sad${w}x${h}", qw/sse2/;
        specialize "aom_highbd_sad${w}x${h}_avg", qw/sse2/;
      }
821 822 823
      if (aom_config("CONFIG_JNT_COMP") eq "yes") {
        add_proto qw/unsigned int/, "aom_highbd_jnt_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, const JNT_COMP_PARAMS* jcp_param";
      }
Yaowu Xu's avatar
Yaowu Xu committed
824
    }
James Zern's avatar
James Zern committed
825 826 827
    specialize qw/aom_highbd_sad128x128 avx2/;
    specialize qw/aom_highbd_sad128x64  avx2/;
    specialize qw/aom_highbd_sad64x128  avx2/;
828 829 830 831 832 833 834 835 836
    specialize qw/aom_highbd_sad64x64   avx2 sse2/;
    specialize qw/aom_highbd_sad64x32   avx2 sse2/;
    specialize qw/aom_highbd_sad32x64   avx2 sse2/;
    specialize qw/aom_highbd_sad32x32   avx2 sse2/;
    specialize qw/aom_highbd_sad32x16   avx2 sse2/;
    specialize qw/aom_highbd_sad16x32   avx2 sse2/;
    specialize qw/aom_highbd_sad16x16   avx2 sse2/;
    specialize qw/aom_highbd_sad16x8    avx2 sse2/;
    specialize qw/aom_highbd_sad8x4     sse2/;
James Zern's avatar
James Zern committed
837 838 839 840

    specialize qw/aom_highbd_sad128x128_avg avx2/;
    specialize qw/aom_highbd_sad128x64_avg  avx2/;
    specialize qw/aom_highbd_sad64x128_avg  avx2/;
841 842 843 844 845 846 847 848 849
    specialize qw/aom_highbd_sad64x64_avg   avx2 sse2/;
    specialize qw/aom_highbd_sad64x32_avg   avx2 sse2/;
    specialize qw/aom_highbd_sad32x64_avg   avx2 sse2/;
    specialize qw/aom_highbd_sad32x32_avg   avx2 sse2/;
    specialize qw/aom_highbd_sad32x16_avg   avx2 sse2/;
    specialize qw/aom_highbd_sad16x32_avg   avx2 sse2/;
    specialize qw/aom_highbd_sad16x16_avg   avx2 sse2/;
    specialize qw/aom_highbd_sad16x8_avg    avx2 sse2/;
    specialize qw/aom_highbd_sad8x4_avg     sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
850

851 852 853 854 855 856 857 858 859 860 861 862 863
    if (aom_config("CONFIG_EXT_PARTITION_TYPES") eq "yes") {
      specialize qw/aom_highbd_sad16x4       sse2/;
      specialize qw/aom_highbd_sad8x32       sse2/;
      specialize qw/aom_highbd_sad32x8       sse2/;
      specialize qw/aom_highbd_sad16x64      sse2/;
      specialize qw/aom_highbd_sad64x16      sse2/;

      specialize qw/aom_highbd_sad16x4_avg   sse2/;
      specialize qw/aom_highbd_sad8x32_avg   sse2/;
      specialize qw/aom_highbd_sad32x8_avg   sse2/;
      specialize qw/aom_highbd_sad16x64_avg  sse2/;
      specialize qw/aom_highbd_sad64x16_avg  sse2/;
    }
James Zern's avatar
James Zern committed
864 865 866
  #
  # Masked SAD
  #
867 868 869 870 871 872
  foreach (@block_sizes) {
    ($w, $h) = @$_;
    add_proto qw/unsigned int/, "aom_masked_sad${w}x${h}", "const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask";
    specialize "aom_masked_sad${w}x${h}", qw/ssse3/;
  }

873

James Zern's avatar
James Zern committed
874 875
    foreach (@block_sizes) {
      ($w, $h) = @$_;
876 877
      add_proto qw/unsigned int/, "aom_highbd_masked_sad${w}x${h}", "const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask";
      specialize "aom_highbd_masked_sad${w}x${h}", qw/ssse3/;
James Zern's avatar
James Zern committed
878
    }
879

Yaowu Xu's avatar
Yaowu Xu committed
880

James Zern's avatar
James Zern committed
881 882 883
  #
  # OBMC SAD
  #
884 885 886 887 888 889 890 891
  foreach (@block_sizes) {
    ($w, $h) = @$_;
    add_proto qw/unsigned int/, "aom_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask";
    if (! (($w == 128 && $h == 32) || ($w == 32 && $h == 128))) {
       specialize "aom_obmc_sad${w}x${h}", qw/sse4_1/;
    }
  }

892

Yaowu Xu's avatar
Yaowu Xu committed
893 894
    foreach (@block_sizes) {
      ($w, $h) = @$_;
895
      add_proto qw/unsigned int/, "aom_highbd_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask";
896
      if (! (($w == 128 && $h == 32) || ($w == 32 && $h == 128))) {
897
        specialize "aom_highbd_obmc_sad${w}x${h}", qw/sse4_1/;
James Zern's avatar
James Zern committed
898
      }
Yaowu Xu's avatar
Yaowu Xu committed
899
    }
900

Yaowu Xu's avatar
Yaowu Xu committed
901

James Zern's avatar
James Zern committed
902 903 904 905 906 907 908 909 910 911 912 913
  #
  # Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally
  #
  # Blocks of 3
  foreach $s (@block_widths) {
    add_proto qw/void/, "aom_sad${s}x${s}x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
  }
  specialize qw/aom_sad64x64x3            msa/;
  specialize qw/aom_sad32x32x3            msa/;
  specialize qw/aom_sad16x16x3 sse3 ssse3 msa/;
  specialize qw/aom_sad8x8x3   sse3       msa/;
  specialize qw/aom_sad4x4x3   sse3       msa/;
Yaowu Xu's avatar
Yaowu Xu committed
914

James Zern's avatar
James Zern committed
915 916 917 918 919 920
  add_proto qw/void/, "aom_sad16x8x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
  specialize qw/aom_sad16x8x3 sse3 ssse3 msa/;
  add_proto qw/void/, "aom_sad8x16x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
  specialize qw/aom_sad8x16x3 sse3 msa/;

  # Blocks of 8
Yaowu Xu's avatar
Yaowu Xu committed
921
  foreach $s (@block_widths) {
James Zern's avatar
James Zern committed
922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938
    add_proto qw/void/, "aom_sad${s}x${s}x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
  }
  specialize qw/aom_sad64x64x8        msa/;
  specialize qw/aom_sad32x32x8        msa/;
  specialize qw/aom_sad16x16x8 sse4_1 msa/;
  specialize qw/aom_sad8x8x8   sse4_1 msa/;
  specialize qw/aom_sad4x4x8   sse4_1 msa/;

  add_proto qw/void/, "aom_sad16x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
  specialize qw/aom_sad16x8x8 sse4_1 msa/;
  add_proto qw/void/, "aom_sad8x16x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
  specialize qw/aom_sad8x16x8 sse4_1 msa/;
  add_proto qw/void/, "aom_sad8x4x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
  specialize qw/aom_sad8x4x8 msa/;
  add_proto qw/void/, "aom_sad4x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
  specialize qw/aom_sad4x8x8 msa/;

939 940

  foreach $s (@block_widths) {
Yaowu Xu's avatar
Yaowu Xu committed
941
    # Blocks of 3
942
    add_proto qw/void/, "aom_highbd_sad${s}x${s}x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
Yaowu Xu's avatar
Yaowu Xu committed
943
    # Blocks of 8
944
    add_proto qw/void/, "aom_highbd_sad${s}x${s}x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
Yaowu Xu's avatar
Yaowu Xu committed
945
  }
946 947 948 949 950 951 952 953 954
  # Blocks of 3
  add_proto qw/void/, "aom_highbd_sad16x8x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
  add_proto qw/void/, "aom_highbd_sad8x16x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
  # Blocks of 8
  add_proto qw/void/, "aom_highbd_sad16x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
  add_proto qw/void/, "aom_highbd_sad8x16x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
  add_proto qw/void/, "aom_highbd_sad8x4x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
  add_proto qw/void/, "aom_highbd_sad4x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";

Yaowu Xu's avatar
Yaowu Xu committed
955 956 957 958 959 960

  #
  # Multi-block SAD, comparing a reference to N independent blocks
  #
  foreach (@block_sizes) {
    ($w, $h) = @$_;
James Zern's avatar
James Zern committed
961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980
    add_proto qw/void/, "aom_sad${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
  }

  specialize qw/aom_sad128x128x4d avx2          sse2/;
  specialize qw/aom_sad128x64x4d  avx2          sse2/;
  specialize qw/aom_sad64x128x4d  avx2          sse2/;
  specialize qw/aom_sad64x64x4d   avx2 neon msa sse2/;
  specialize qw/aom_sad64x32x4d   avx2      msa sse2/;
  specialize qw/aom_sad32x64x4d   avx2      msa sse2/;
  specialize qw/aom_sad32x32x4d   avx2 neon msa sse2/;
  specialize qw/aom_sad32x16x4d             msa sse2/;
  specialize qw/aom_sad16x32x4d             msa sse2/;
  specialize qw/aom_sad16x16x4d        neon msa sse2/;
  specialize qw/aom_sad16x8x4d              msa sse2/;
  specialize qw/aom_sad8x16x4d              msa sse2/;
  specialize qw/aom_sad8x8x4d               msa sse2/;
  specialize qw/aom_sad8x4x4d               msa sse2/;
  specialize qw/aom_sad4x8x4d               msa sse2/;
  specialize qw/aom_sad4x4x4d               msa sse2/;

981 982 983 984 985 986 987 988 989
  if (aom_config("CONFIG_EXT_PARTITION_TYPES") eq "yes") {
    specialize qw/aom_sad4x16x4d  sse2/;
    specialize qw/aom_sad16x4x4d  sse2/;
    specialize qw/aom_sad8x32x4d  sse2/;
    specialize qw/aom_sad32x8x4d  sse2/;
    specialize qw/aom_sad16x64x4d sse2/;
    specialize qw/aom_sad64x16x4d sse2/;
  }

James Zern's avatar
James Zern committed
990 991 992 993 994 995 996 997 998
    #
    # Multi-block SAD, comparing a reference to N independent blocks
    #
    foreach (@block_sizes) {
      ($w, $h) = @$_;
      add_proto qw/void/, "aom_highbd_sad${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
      if ($w != 128 && $h != 128) {
        specialize "aom_highbd_sad${w}x${h}x4d", qw/sse2/;
      }
Yaowu Xu's avatar
Yaowu Xu committed
999
    }
James Zern's avatar
James Zern committed
1000 1001 1002
    specialize qw/aom_highbd_sad128x128x4d avx2/;
    specialize qw/aom_highbd_sad128x64x4d  avx2/;
    specialize qw/aom_highbd_sad64x128x4d  avx2/;
1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024
    specialize qw/aom_highbd_sad64x64x4d   sse2 avx2/;
    specialize qw/aom_highbd_sad64x32x4d   sse2 avx2/;
    specialize qw/aom_highbd_sad32x64x4d   sse2 avx2/;
    specialize qw/aom_highbd_sad32x32x4d   sse2 avx2/;
    specialize qw/aom_highbd_sad32x16x4d   sse2 avx2/;
    specialize qw/aom_highbd_sad16x32x4d   sse2 avx2/;
    specialize qw/aom_highbd_sad16x16x4d   sse2 avx2/;
    specialize qw/aom_highbd_sad16x8x4d    sse2 avx2/;
    specialize qw/aom_highbd_sad8x16x4d    sse2/;
    specialize qw/aom_highbd_sad8x8x4d     sse2/;
    specialize qw/aom_highbd_sad8x4x4d     sse2/;
    specialize qw/aom_highbd_sad4x8x4d     sse2/;
    specialize qw/aom_highbd_sad4x4x4d     sse2/;

    if (aom_config("CONFIG_EXT_PARTITION_TYPES") eq "yes") {
      specialize qw/aom_highbd_sad4x16x4d  sse2/;
      specialize qw/aom_highbd_sad16x4x4d  sse2/;
      specialize qw/aom_highbd_sad8x32x4d  sse2/;
      specialize qw/aom_highbd_sad32x8x4d  sse2/;
      specialize qw/aom_highbd_sad16x64x4d sse2/;
      specialize qw/aom_highbd_sad64x16x4d sse2/;
    }
1025

Yaowu Xu's avatar
Yaowu Xu committed
1026

James Zern's avatar
James Zern committed
1027 1028 1029 1030 1031 1032
  #
  # Structured Similarity (SSIM)
  #
  if (aom_config("CONFIG_INTERNAL_STATS") eq "yes") {
    add_proto qw/void aom_ssim_parms_8x8/, "const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
    specialize qw/aom_ssim_parms_8x8/, "$sse2_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
1033

James Zern's avatar
James Zern committed
1034 1035
    add_proto qw/void aom_ssim_parms_16x16/, "const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
    specialize qw/aom_ssim_parms_16x16/, "$sse2_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
1036

1037 1038
    add_proto qw/void aom_highbd_ssim_parms_8x8/, "const uint16_t *s, int sp, const uint16_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";

Yaowu Xu's avatar
Yaowu Xu committed
1039
  }
1040
}  # CONFIG_AV1_ENCODER
Yaowu Xu's avatar
Yaowu Xu committed
1041

1042
if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
1043

James Zern's avatar
James Zern committed
1044 1045 1046 1047
  #
  # Specialty Variance
  #
  add_proto qw/void aom_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
Yaowu Xu's avatar
Yaowu Xu committed
1048

James Zern's avatar
James Zern committed
1049
  add_proto qw/void aom_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
Yaowu Xu's avatar
Yaowu Xu committed
1050

James Zern's avatar
James Zern committed
1051 1052
  specialize qw/aom_get16x16var sse2 avx2 neon msa/;
  specialize qw/aom_get8x8var   sse2      neon msa/;
Yaowu Xu's avatar
Yaowu Xu committed
1053 1054


James Zern's avatar
James Zern committed
1055 1056 1057 1058
  add_proto qw/unsigned int aom_mse16x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
  add_proto qw/unsigned int aom_mse16x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
  add_proto qw/unsigned int aom_mse8x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
  add_proto qw/unsigned int aom_mse8x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
Yaowu Xu's avatar
Yaowu Xu committed
1059

James Zern's avatar
James Zern committed
1060 1061 1062 1063
  specialize qw/aom_mse16x16          sse2 avx2 neon msa/;
  specialize qw/aom_mse16x8           sse2           msa/;
  specialize qw/aom_mse8x16           sse2           msa/;
  specialize qw/aom_mse8x8            sse2           msa/;
Yaowu Xu's avatar
Yaowu Xu committed
1064

James Zern's avatar
James Zern committed
1065 1066 1067
    foreach $bd (8, 10, 12) {
      add_proto qw/void/, "aom_highbd_${bd}_get16x16var", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
      add_proto qw/void/, "aom_highbd_${bd}_get8x8var", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
Yaowu Xu's avatar
Yaowu Xu committed
1068

James Zern's avatar
James Zern committed
1069 1070 1071 1072
      add_proto qw/unsigned int/, "aom_highbd_${bd}_mse16x16", "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
      add_proto qw/unsigned int/, "aom_highbd_${bd}_mse16x8", "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
      add_proto qw/unsigned int/, "aom_highbd_${bd}_mse8x16", "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
      add_proto qw/unsigned int/, "aom_highbd_${bd}_mse8x8", "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
Yaowu Xu's avatar
Yaowu Xu committed
1073

James Zern's avatar
James Zern committed
1074 1075 1076
      specialize "aom_highbd_${bd}_mse16x16", qw/sse2/;
      specialize "aom_highbd_${bd}_mse8x8", qw/sse2/;
    }
Yaowu Xu's avatar
Yaowu Xu committed
1077

1078 1079

  #
James Zern's avatar
James Zern committed
1080 1081 1082 1083
  #
  #
  add_proto qw/void aom_upsampled_pred/, "uint8_t *comp_pred, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref, int ref_stride";
  specialize qw/aom_upsampled_pred sse2/;
Cheng Chen's avatar
Cheng Chen committed
1084

James Zern's avatar
James Zern committed
1085 1086
  add_proto qw/void aom_comp_avg_upsampled_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref, int ref_stride";
  specialize qw/aom_comp_avg_upsampled_pred sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
1087

Cheng Chen's avatar
Cheng Chen committed
1088 1089
  if (aom_config("CONFIG_JNT_COMP") eq "yes") {
    add_proto qw/void aom_jnt_comp_avg_upsampled_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref, int ref_stride, const JNT_COMP_PARAMS *jcp_param";
1090
    specialize qw/aom_jnt_comp_avg_upsampled_pred ssse3/;
Cheng Chen's avatar
Cheng Chen committed
1091 1092
  }

1093

1094 1095 1096 1097 1098 1099 1100 1101
  add_proto qw/void aom_highbd_upsampled_pred/, "uint16_t *comp_pred, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref8, int ref_stride, int bd";
  specialize qw/aom_highbd_upsampled_pred sse2/;
  add_proto qw/void aom_highbd_comp_avg_upsampled_pred/, "uint16_t *comp_pred, const uint8_t *pred8, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref8, int ref_stride, int bd";
  specialize qw/aom_highbd_comp_avg_upsampled_pred sse2/;

  if (aom_config("CONFIG_JNT_COMP") eq "yes") {
    add_proto qw/void aom_highbd_jnt_comp_avg_upsampled_pred/, "uint16_t *comp_pred, const uint8_t *pred8, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref8, int ref_stride, int bd, const JNT_COMP_PARAMS *jcp_param";
    specialize qw/aom_highbd_jnt_comp_avg_upsampled_pred sse2/;
James Zern's avatar
James Zern committed
1102
  }
Yaowu Xu's avatar
Yaowu Xu committed
1103

1104 1105

  #
James Zern's avatar
James Zern committed
1106 1107 1108 1109
  #
  #
  add_proto qw/unsigned int aom_get_mb_ss/, "const int16_t *";
  add_proto qw/unsigned int aom_get4x4sse_cs/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride";
Yaowu Xu's avatar
Yaowu Xu committed
1110

James Zern's avatar
James Zern committed
1111 1112
  specialize qw/aom_get_mb_ss sse2 msa/;
  specialize qw/aom_get4x4sse_cs neon msa/;
Yaowu Xu's avatar
Yaowu Xu committed
1113

James Zern's avatar
James Zern committed
1114 1115 1116
  #
  # Variance / Subpixel Variance / Subpixel Avg Variance
  #
Jingning Han's avatar
Jingning Han committed
1117 1118
  add_proto qw/unsigned int/, "aom_variance2x2", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";

1119 1120 1121 1122
  add_proto qw/unsigned int/, "aom_variance2x4", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";

  add_proto qw/unsigned int/, "aom_variance4x2", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";

James Zern's avatar
James Zern committed
1123 1124 1125 1126 1127
  foreach (@block_sizes) {
    ($w, $h) = @$_;
    add_proto qw/unsigned int/, "aom_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
    add_proto qw/uint32_t/, "aom_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
    add_proto qw/uint32_t/, "aom_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
Cheng Chen's avatar
Cheng Chen committed
1128 1129 1130
    if (aom_config("CONFIG_JNT_COMP") eq "yes") {
      add_proto qw/uint32_t/, "aom_jnt_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred, const JNT_COMP_PARAMS *jcp_param";
    }
James Zern's avatar
James Zern committed
1131
  }
Yaowu Xu's avatar
Yaowu Xu committed
1132

James Zern's avatar
James Zern committed
1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159
  specialize qw/aom_variance64x64     sse2 avx2 neon msa/;
  specialize qw/aom_variance64x32     sse2 avx2 neon msa/;
  specialize qw/aom_variance32x64     sse2      neon msa/;
  specialize qw/aom_variance32x32     sse2 avx2 neon msa/;
  specialize qw/aom_variance32x16     sse2 avx2 msa/;
  specialize qw/aom_variance16x32     sse2      msa/;
  specialize qw/aom_variance16x16     sse2 avx2 neon msa/;
  specialize qw/aom_variance16x8      sse2      neon msa/;
  specialize qw/aom_variance8x16      sse2      neon msa/;
  specialize qw/aom_variance8x8       sse2      neon msa/;
  specialize qw/aom_variance8x4       sse2           msa/;
  specialize qw/aom_variance4x8       sse2           msa/;
  specialize qw/aom_variance4x4       sse2           msa/;

  specialize qw/aom_sub_pixel_variance64x64     avx2 neon msa sse2 ssse3/;
  specialize qw/aom_sub_pixel_variance64x32               msa sse2 ssse3/;
  specialize qw/aom_sub_pixel_variance32x64               msa sse2 ssse3/;
  specialize qw/aom_sub_pixel_variance32x32     avx2 neon msa sse2 ssse3/;
  specialize qw/aom_sub_pixel_variance32x16               msa sse2 ssse3/;
  specialize qw/aom_sub_pixel_variance16x32               msa sse2 ssse3/;
  specialize qw/aom_sub_pixel_variance16x16          neon msa sse2 ssse3/;
  specialize qw/aom_sub_pixel_variance16x8                msa sse2 ssse3/;
  specialize qw/aom_sub_pixel_variance8x16                msa sse2 ssse3/;
  specialize qw/aom_sub_pixel_variance8x8            neon msa sse2 ssse3/;
  specialize qw/aom_sub_pixel_variance8x4                 msa sse2 ssse3/;
  specialize qw/aom_sub_pixel_variance4x8                 msa sse2 ssse3/;
  specialize qw/aom_sub_pixel_variance4x4                 msa sse2 ssse3/;
1160

James Zern's avatar
James Zern committed
1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194
  specialize qw/aom_sub_pixel_avg_variance64x64 avx2 msa sse2 ssse3/;
  specialize qw/aom_sub_pixel_avg_variance64x32      msa sse2 ssse3/;
  specialize qw/aom_sub_pixel_avg_variance32x64      msa sse2 ssse3/;
  specialize qw/aom_sub_pixel_avg_variance32x32 avx2 msa sse2 ssse3/;
  specialize qw/aom_sub_pixel_avg_variance32x16      msa sse2 ssse3/;
  specialize qw/aom_sub_pixel_avg_variance16x32      msa sse2 ssse3/;
  specialize qw/aom_sub_pixel_avg_variance16x16      msa sse2 ssse3/;
  specialize qw/aom_sub_pixel_avg_variance16x8       msa sse2 ssse3/;
  specialize qw/aom_sub_pixel_avg_variance8x16       msa sse2 ssse3/;
  specialize qw/aom_sub_pixel_avg_variance8x8        msa sse2 ssse3/;
  specialize qw/aom_sub_pixel_avg_variance8x4        msa sse2 ssse3/;
  specialize qw/aom_sub_pixel_avg_variance4x8        msa sse2 ssse3/;
  specialize qw/aom_sub_pixel_avg_variance4x4        msa sse2 ssse3/;

  if (aom_config("CONFIG_EXT_PARTITION_TYPES") eq "yes") {
    specialize qw/aom_variance4x16 sse2/;
    specialize qw/aom_variance16x4 sse2/;
    specialize qw/aom_variance8x32 sse2/;
    specialize qw/aom_variance32x8 sse2/;
    specialize qw/aom_variance16x64 sse2/;
    specialize qw/aom_variance64x16 sse2/;
    specialize qw/aom_sub_pixel_variance4x16 sse2 ssse3/;
    specialize qw/aom_sub_pixel_variance16x4 sse2 ssse3/;
    specialize qw/aom_sub_pixel_variance8x32 sse2 ssse3/;
    specialize qw/aom_sub_pixel_variance32x8 sse2 ssse3/;
    specialize qw/aom_sub_pixel_variance16x64 sse2 ssse3/;
    specialize qw/aom_sub_pixel_variance64x16 sse2 ssse3/;
    specialize qw/aom_sub_pixel_avg_variance4x16 sse2 ssse3/;
    specialize qw/aom_sub_pixel_avg_variance16x4 sse2 ssse3/;
    specialize qw/aom_sub_pixel_avg_variance8x32 sse2 ssse3/;
    specialize qw/aom_sub_pixel_avg_variance32x8 sse2 ssse3/;
    specialize qw/aom_sub_pixel_avg_variance16x64 sse2 ssse3/;
    specialize qw/aom_sub_pixel_avg_variance64x16 sse2 ssse3/;
  }
1195

1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231
  if (aom_config("CONFIG_JNT_COMP") eq "yes") {
    specialize qw/aom_jnt_sub_pixel_avg_variance64x64 ssse3/;
    specialize qw/aom_jnt_sub_pixel_avg_variance64x32 ssse3/;
    specialize qw/aom_jnt_sub_pixel_avg_variance32x64 ssse3/;
    specialize qw/aom_jnt_sub_pixel_avg_variance32x32 ssse3/;
    specialize qw/aom_jnt_sub_pixel_avg_variance32x16 ssse3/;
    specialize qw/aom_jnt_sub_pixel_avg_variance16x32 ssse3/;
    specialize qw/aom_jnt_sub_pixel_avg_variance16x16 ssse3/;
    specialize qw/aom_jnt_sub_pixel_avg_variance16x8  ssse3/;
    specialize qw/aom_jnt_sub_pixel_avg_variance8x16  ssse3/;
    specialize qw/aom_jnt_sub_pixel_avg_variance8x8   ssse3/;
    specialize qw/aom_jnt_sub_pixel_avg_variance8x4   ssse3/;
    specialize qw/aom_jnt_sub_pixel_avg_variance4x8   ssse3/;
    specialize qw/aom_jnt_sub_pixel_avg_variance4x4   ssse3/;

    if (aom_config("CONFIG_EXT_PARTITION_TYPES") eq "yes") {
      specialize qw/aom_jnt_sub_pixel_avg_variance4x16  ssse3/;
      specialize qw/aom_jnt_sub_pixel_avg_variance16x4  ssse3/;
      specialize qw/aom_jnt_sub_pixel_avg_variance8x32  ssse3/;
      specialize qw/aom_jnt_sub_pixel_avg_variance32x8  ssse3/;
      specialize qw/aom_jnt_sub_pixel_avg_variance16x64 ssse3/;
      specialize qw/aom_jnt_sub_pixel_avg_variance64x16 ssse3/;

      if (aom_config("CONFIG_EXT_PARTITION") eq "yes") {
        specialize qw/aom_jnt_sub_pixel_avg_variance128x32   ssse3/;
        specialize qw/aom_jnt_sub_pixel_avg_variance32x128   ssse3/;
      }
    }

    if (aom_config("CONFIG_EXT_PARTITION") eq "yes") {
      specialize qw/aom_jnt_sub_pixel_avg_variance128x128  ssse3/;
      specialize qw/aom_jnt_sub_pixel_avg_variance128x64   ssse3/;
      specialize qw/aom_jnt_sub_pixel_avg_variance64x128   ssse3/;
    }
  }

1232

1233 1234
  foreach $bd (8, 10, 12) {
    add_proto qw/unsigned int/, "aom_highbd_${bd}_variance2x2", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1235

1236
    add_proto qw/unsigned int/, "aom_highbd_${bd}_variance2x4", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1237

1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250
    add_proto qw/unsigned int/, "aom_highbd_${bd}_variance4x2", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";

    foreach (@block_sizes) {
      ($w, $h) = @$_;
      add_proto qw/unsigned int/, "aom_highbd_${bd}_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
      add_proto qw/uint32_t/, "aom_highbd_${bd}_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
      add_proto qw/uint32_t/, "aom_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
      if ($w != 128 && $h != 128 && $w != 4 && $h != 4) {
        specialize "aom_highbd_${bd}_variance${w}x${h}", "sse2";
      }
      # TODO(david.barker): When ext-partition-types is enabled, we currently
      # don't have vectorized 4x16 highbd variance functions
      if ($w == 4 && $h == 4) {
James Zern's avatar
James Zern committed
1251 1252
          specialize "aom_highbd_${bd}_variance${w}x${h}", "sse4_1";
        }
1253 1254 1255 1256 1257 1258 1259 1260
      if ($w != 128 && $h != 128 && $w != 4) {
        specialize "aom_highbd_${bd}_sub_pixel_variance${w}x${h}", qw/sse2/;
        specialize "aom_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", qw/sse2/;
      }
      if ($w == 4 && $h == 4) {
        specialize "aom_highbd_${bd}_sub_pixel_variance${w}x${h}", "sse4_1";
        specialize "aom_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", "sse4_1";
      }
1261

1262 1263
      if (aom_config("CONFIG_JNT_COMP") eq "yes") {
        add_proto qw/uint32_t/, "aom_highbd_${bd}_jnt_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred, const JNT_COMP_PARAMS* jcp_param";
Yaowu Xu's avatar
Yaowu Xu committed
1264 1265
      }
    }
1266
  }
Yaowu Xu's avatar
Yaowu Xu committed
1267

1268 1269 1270 1271 1272 1273 1274 1275
  #
  # Masked Variance / Masked Subpixel Variance
  #
  foreach (@block_sizes) {
    ($w, $h) = @$_;
    add_proto qw/unsigned int/, "aom_masked_sub_pixel_variance${w}x${h}", "const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse";
    specialize "aom_masked_sub_pixel_variance${w}x${h}", qw/ssse3/;
  }
Yaowu Xu's avatar
Yaowu Xu committed
1276

1277

1278 1279 1280 1281 1282
    foreach $bd ("_8_", "_10_", "_12_") {
      foreach (@block_sizes) {
        ($w, $h) = @$_;
        add_proto qw/unsigned int/, "aom_highbd${bd}masked_sub_pixel_variance${w}x${h}", "const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse";
        specialize "aom_highbd${bd}masked_sub_pixel_variance${w}x${h}", qw/ssse3/;
Yaowu Xu's avatar
Yaowu Xu committed
1283 1284
      }
    }
1285

Yaowu Xu's avatar
Yaowu Xu committed
1286

James Zern's avatar
James Zern committed
1287 1288 1289
  #
  # OBMC Variance / OBMC Subpixel Variance
  #
1290 1291 1292 1293 1294 1295
  foreach (@block_sizes) {
    ($w, $h) = @$_;
    add_proto qw/unsigned int/, "aom_obmc_variance${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
    add_proto qw/unsigned int/, "aom_obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
    specialize "aom_obmc_variance${w}x${h}", q/sse4_1/;
  }
Yaowu Xu's avatar
Yaowu Xu committed
1296

1297

1298 1299 1300 1301 1302 1303
    foreach $bd ("_", "_10_", "_12_") {
      foreach (@block_sizes) {
        ($w, $h) = @$_;
        add_proto qw/unsigned int/, "aom_highbd${bd}obmc_variance${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
        add_proto qw/unsigned int/, "aom_highbd${bd}obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
        specialize "aom_highbd${bd}obmc_variance${w}x${h}", qw/sse4_1/;
Yaowu Xu's avatar
Yaowu Xu committed
1304 1305
      }
    }
1306

Yaowu Xu's avatar
Yaowu Xu committed
1307

James Zern's avatar
James Zern committed
1308
  add_proto qw/uint32_t aom_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
Yaowu Xu's avatar
Yaowu Xu committed
1309 1310
  specialize qw/aom_sub_pixel_avg_variance64x64 avx2 msa sse2 ssse3/;

James Zern's avatar
James Zern committed
1311
  add_proto qw/uint32_t aom_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
Yaowu Xu's avatar
Yaowu Xu committed
1312 1313
  specialize qw/aom_sub_pixel_avg_variance64x32 msa sse2 ssse3/;

James Zern's avatar
James Zern committed
1314
  add_proto qw/uint32_t aom_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
Yaowu Xu's avatar
Yaowu Xu committed
1315 1316
  specialize qw/aom_sub_pixel_avg_variance32x64 msa sse2 ssse3/;

James Zern's avatar
James Zern committed
1317
  add_proto qw/uint32_t aom_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
Yaowu Xu's avatar
Yaowu Xu committed
1318 1319
  specialize qw/aom_sub_pixel_avg_variance32x32 avx2 msa sse2 ssse3/;

James Zern's avatar
James Zern committed
1320
  add_proto qw/uint32_t aom_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
Yaowu Xu's avatar
Yaowu Xu committed
1321 1322
  specialize qw/aom_sub_pixel_avg_variance32x16 msa sse2 ssse3/;

James Zern's avatar
James Zern committed
1323
  add_proto qw/uint32_t aom_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
Yaowu Xu's avatar
Yaowu Xu committed
1324 1325
  specialize qw/aom_sub_pixel_avg_variance16x32 msa sse2 ssse3/;

James Zern's avatar
James Zern committed
1326
  add_proto qw/uint32_t aom_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
Yaowu Xu's avatar
Yaowu Xu committed
1327 1328
  specialize qw/aom_sub_pixel_avg_variance16x16 msa sse2 ssse3/;

James Zern's avatar
James Zern committed
1329
  add_proto qw/uint32_t aom_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
Yaowu Xu's avatar
Yaowu Xu committed
1330 1331
  specialize qw/aom_sub_pixel_avg_variance16x8 msa sse2 ssse3/;