aom_dsp_rtcd_defs.pl 112 KB
Newer Older
Yaowu Xu's avatar
Yaowu Xu committed
1 2 3 4 5 6 7 8
sub aom_dsp_forward_decls() {
print <<EOF
/*
 * DSP
 */

#include "aom/aom_integer.h"
#include "aom_dsp/aom_dsp_common.h"
9
#include "av1/common/enums.h"
Cheng Chen's avatar
Cheng Chen committed
10
#include "av1/common/blockd.h"
Yaowu Xu's avatar
Yaowu Xu committed
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43

EOF
}
forward_decls qw/aom_dsp_forward_decls/;

# optimizations which depend on multiple features
$avx2_ssse3 = '';
if ((aom_config("HAVE_AVX2") eq "yes") && (aom_config("HAVE_SSSE3") eq "yes")) {
  $avx2_ssse3 = 'avx2';
}

# functions that are 64 bit only.
$mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 = $avx_x86_64 = $avx2_x86_64 = '';
if ($opts{arch} eq "x86_64") {
  $mmx_x86_64 = 'mmx';
  $sse2_x86_64 = 'sse2';
  $ssse3_x86_64 = 'ssse3';
  $avx_x86_64 = 'avx';
  $avx2_x86_64 = 'avx2';
}

if (aom_config("CONFIG_EXT_PARTITION") eq "yes") {
  @block_widths = (4, 8, 16, 32, 64, 128)
} else {
  @block_widths = (4, 8, 16, 32, 64)
}

@block_sizes = ();
foreach $w (@block_widths) {
  foreach $h (@block_widths) {
    push @block_sizes, [$w, $h] if ($w <= 2*$h && $h <= 2*$w) ;
  }
}
44
if (aom_config("CONFIG_EXT_PARTITION_TYPES") eq "yes") {
45 46 47 48
  push @block_sizes, [4, 16];
  push @block_sizes, [16, 4];
  push @block_sizes, [8, 32];
  push @block_sizes, [32, 8];
49 50
  push @block_sizes, [16, 64];
  push @block_sizes, [64, 16];
51 52 53 54
  if (aom_config("CONFIG_EXT_PARTITION") eq "yes") {
      push @block_sizes, [32, 128];
      push @block_sizes, [128, 32];
  }
55
}
Yaowu Xu's avatar
Yaowu Xu committed
56

57 58 59 60
@tx_dims = (2, 4, 8, 16, 32);
if (aom_config("CONFIG_TX64X64") eq "yes") {
  push @tx_dims, '64';
}
61

62 63 64 65 66 67 68 69
@tx_sizes = ();
foreach $w (@tx_dims) {
  push @tx_sizes, [$w, $w];
  foreach $h (@tx_dims) {
    push @tx_sizes, [$w, $h] if ($w >=4 && $h >=4 && ($w == 2*$h || $h == 2*$w));
  }
}

70
@pred_names = qw/dc dc_top dc_left dc_128 v h d207e d63e d45e d117 d135 d153 paeth smooth smooth_v smooth_h/;
71

72 73 74
#
# Intra prediction
#
75

76 77
foreach (@tx_sizes) {
  ($w, $h) = @$_;
78 79 80
  foreach $pred_name (@pred_names) {
    add_proto "void", "aom_${pred_name}_predictor_${w}x${h}",
              "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
81
    if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
82 83 84 85 86
      add_proto "void", "aom_highbd_${pred_name}_predictor_${w}x${h}",
                "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    }
  }
}
87

88 89 90 91 92 93 94 95
specialize qw/aom_dc_top_predictor_4x4 msa neon sse2/;
specialize qw/aom_dc_top_predictor_4x8 sse2/;
specialize qw/aom_dc_top_predictor_8x4 sse2/;
specialize qw/aom_dc_top_predictor_8x8 neon msa sse2/;
specialize qw/aom_dc_top_predictor_8x16 sse2/;
specialize qw/aom_dc_top_predictor_16x8 sse2/;
specialize qw/aom_dc_top_predictor_16x16 neon msa sse2/;
specialize qw/aom_dc_top_predictor_16x32 sse2/;
96 97
specialize qw/aom_dc_top_predictor_32x16 sse2 avx2/;
specialize qw/aom_dc_top_predictor_32x32 msa neon sse2 avx2/;
98 99 100 101 102 103 104 105
specialize qw/aom_dc_left_predictor_4x4 msa neon sse2/;
specialize qw/aom_dc_left_predictor_4x8 sse2/;
specialize qw/aom_dc_left_predictor_8x4 sse2/;
specialize qw/aom_dc_left_predictor_8x8 neon msa sse2/;
specialize qw/aom_dc_left_predictor_8x16 sse2/;
specialize qw/aom_dc_left_predictor_16x8 sse2/;
specialize qw/aom_dc_left_predictor_16x16 neon msa sse2/;
specialize qw/aom_dc_left_predictor_16x32 sse2/;
106 107
specialize qw/aom_dc_left_predictor_32x16 sse2 avx2/;
specialize qw/aom_dc_left_predictor_32x32 msa neon sse2 avx2/;
108 109 110 111 112 113 114 115
specialize qw/aom_dc_128_predictor_4x4 msa neon sse2/;
specialize qw/aom_dc_128_predictor_4x8 sse2/;
specialize qw/aom_dc_128_predictor_8x4 sse2/;
specialize qw/aom_dc_128_predictor_8x8 neon msa sse2/;
specialize qw/aom_dc_128_predictor_8x16 sse2/;
specialize qw/aom_dc_128_predictor_16x8 sse2/;
specialize qw/aom_dc_128_predictor_16x16 neon msa sse2/;
specialize qw/aom_dc_128_predictor_16x32 sse2/;
116 117
specialize qw/aom_dc_128_predictor_32x16 sse2 avx2/;
specialize qw/aom_dc_128_predictor_32x32 msa neon sse2 avx2/;
118 119 120 121 122 123 124 125
specialize qw/aom_v_predictor_4x4 neon msa sse2/;
specialize qw/aom_v_predictor_4x8 sse2/;
specialize qw/aom_v_predictor_8x4 sse2/;
specialize qw/aom_v_predictor_8x8 neon msa sse2/;
specialize qw/aom_v_predictor_8x16 sse2/;
specialize qw/aom_v_predictor_16x8 sse2/;
specialize qw/aom_v_predictor_16x16 neon msa sse2/;
specialize qw/aom_v_predictor_16x32 sse2/;
126 127
specialize qw/aom_v_predictor_32x16 sse2 avx2/;
specialize qw/aom_v_predictor_32x32 neon msa sse2 avx2/;
128 129 130 131 132 133 134 135 136
specialize qw/aom_h_predictor_4x8 sse2/;
specialize qw/aom_h_predictor_4x4 neon dspr2 msa sse2/;
specialize qw/aom_h_predictor_8x4 sse2/;
specialize qw/aom_h_predictor_8x8 neon dspr2 msa sse2/;
specialize qw/aom_h_predictor_8x16 sse2/;
specialize qw/aom_h_predictor_16x8 sse2/;
specialize qw/aom_h_predictor_16x16 neon dspr2 msa sse2/;
specialize qw/aom_h_predictor_16x32 sse2/;
specialize qw/aom_h_predictor_32x16 sse2/;
137
specialize qw/aom_h_predictor_32x32 neon msa sse2 avx2/;
138 139 140 141 142
specialize qw/aom_paeth_predictor_4x4 ssse3/;
specialize qw/aom_paeth_predictor_4x8 ssse3/;
specialize qw/aom_paeth_predictor_8x4 ssse3/;
specialize qw/aom_paeth_predictor_8x8 ssse3/;
specialize qw/aom_paeth_predictor_8x16 ssse3/;
143 144 145 146 147
specialize qw/aom_paeth_predictor_16x8 ssse3 avx2/;
specialize qw/aom_paeth_predictor_16x16 ssse3 avx2/;
specialize qw/aom_paeth_predictor_16x32 ssse3 avx2/;
specialize qw/aom_paeth_predictor_32x16 ssse3 avx2/;
specialize qw/aom_paeth_predictor_32x32 ssse3 avx2/;
148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
specialize qw/aom_paeth_predictor_16x8 ssse3/;
specialize qw/aom_paeth_predictor_16x16 ssse3/;
specialize qw/aom_paeth_predictor_16x32 ssse3/;
specialize qw/aom_paeth_predictor_32x16 ssse3/;
specialize qw/aom_paeth_predictor_32x32 ssse3/;
specialize qw/aom_smooth_predictor_4x4 ssse3/;
specialize qw/aom_smooth_predictor_4x8 ssse3/;
specialize qw/aom_smooth_predictor_8x4 ssse3/;
specialize qw/aom_smooth_predictor_8x8 ssse3/;
specialize qw/aom_smooth_predictor_8x16 ssse3/;
specialize qw/aom_smooth_predictor_16x8 ssse3/;
specialize qw/aom_smooth_predictor_16x16 ssse3/;
specialize qw/aom_smooth_predictor_16x32 ssse3/;
specialize qw/aom_smooth_predictor_32x16 ssse3/;
specialize qw/aom_smooth_predictor_32x32 ssse3/;
163

Yaowu Xu's avatar
Yaowu Xu committed
164 165 166
specialize qw/aom_d135_predictor_4x4 neon/;
specialize qw/aom_d153_predictor_4x4 ssse3/;
specialize qw/aom_dc_predictor_4x4 dspr2 msa neon sse2/;
167
specialize qw/aom_dc_predictor_4x8 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
168
specialize qw/aom_d153_predictor_8x8 ssse3/;
169
specialize qw/aom_dc_predictor_8x4 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
170
specialize qw/aom_dc_predictor_8x8 dspr2 neon msa sse2/;
171
specialize qw/aom_dc_predictor_8x16 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
172
specialize qw/aom_d153_predictor_16x16 ssse3/;
173
specialize qw/aom_dc_predictor_16x8 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
174
specialize qw/aom_dc_predictor_16x16 dspr2 neon msa sse2/;
175
specialize qw/aom_dc_predictor_16x32 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
176
specialize qw/aom_d153_predictor_32x32 ssse3/;
177

178 179
specialize qw/aom_dc_predictor_32x16 sse2 avx2/;
specialize qw/aom_dc_predictor_32x32 msa neon sse2 avx2/;
180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211
specialize qw/aom_d207e_predictor_4x4 sse2/;
specialize qw/aom_d207e_predictor_4x8 sse2/;
specialize qw/aom_d207e_predictor_8x4 sse2/;
specialize qw/aom_d207e_predictor_8x8 sse2/;
specialize qw/aom_d207e_predictor_8x16 sse2/;
specialize qw/aom_d207e_predictor_16x8 sse2/;
specialize qw/aom_d207e_predictor_16x16 sse2/;
specialize qw/aom_d207e_predictor_16x32 sse2/;
specialize qw/aom_d207e_predictor_32x16 sse2/;
specialize qw/aom_d207e_predictor_32x32 sse2/;

specialize qw/aom_d63e_predictor_4x4 sse2 ssse3/;
specialize qw/aom_d63e_predictor_4x8 sse2/;
specialize qw/aom_d63e_predictor_8x4 sse2/;
specialize qw/aom_d63e_predictor_8x8 sse2/;
specialize qw/aom_d63e_predictor_8x16 sse2/;
specialize qw/aom_d63e_predictor_16x8 sse2/;
specialize qw/aom_d63e_predictor_16x16 sse2/;
specialize qw/aom_d63e_predictor_16x32 sse2/;
specialize qw/aom_d63e_predictor_32x16 sse2/;
specialize qw/aom_d63e_predictor_32x32 sse2/;

specialize qw/aom_d45e_predictor_4x4 ssse3/;
specialize qw/aom_d45e_predictor_4x8 ssse3/;
specialize qw/aom_d45e_predictor_8x4 ssse3/;
specialize qw/aom_d45e_predictor_8x8 ssse3/;
specialize qw/aom_d45e_predictor_8x16 ssse3/;
specialize qw/aom_d45e_predictor_16x8 ssse3/;
specialize qw/aom_d45e_predictor_16x16 ssse3/;
specialize qw/aom_d45e_predictor_16x32 ssse3/;
specialize qw/aom_d45e_predictor_32x16 ssse3/;
specialize qw/aom_d45e_predictor_32x32 ssse3/;
Yaowu Xu's avatar
Yaowu Xu committed
212

213
if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
214
  specialize qw/aom_highbd_v_predictor_4x4 sse2/;
215 216
  specialize qw/aom_highbd_v_predictor_4x8 sse2/;
  specialize qw/aom_highbd_v_predictor_8x4 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
217
  specialize qw/aom_highbd_v_predictor_8x8 sse2/;
218 219
  specialize qw/aom_highbd_v_predictor_8x16 sse2/;
  specialize qw/aom_highbd_v_predictor_16x8 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
220
  specialize qw/aom_highbd_v_predictor_16x16 sse2/;
221 222
  specialize qw/aom_highbd_v_predictor_16x32 sse2/;
  specialize qw/aom_highbd_v_predictor_32x16 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
223
  specialize qw/aom_highbd_v_predictor_32x32 sse2/;
224 225 226 227 228 229 230 231 232
  specialize qw/aom_highbd_dc_predictor_4x4 sse2/;
  specialize qw/aom_highbd_dc_predictor_4x8 sse2/;
  specialize qw/aom_highbd_dc_predictor_8x4 sse2/;;
  specialize qw/aom_highbd_dc_predictor_8x8 sse2/;;
  specialize qw/aom_highbd_dc_predictor_8x16 sse2/;;
  specialize qw/aom_highbd_dc_predictor_16x8 sse2/;
  specialize qw/aom_highbd_dc_predictor_16x16 sse2/;
  specialize qw/aom_highbd_dc_predictor_16x32 sse2/;
  specialize qw/aom_highbd_dc_predictor_32x16 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
233
  specialize qw/aom_highbd_dc_predictor_32x32 sse2/;
234 235 236 237 238 239 240 241 242 243
  specialize qw/aom_highbd_h_predictor_4x4 sse2/;
  specialize qw/aom_highbd_h_predictor_4x8 sse2/;
  specialize qw/aom_highbd_h_predictor_8x4 sse2/;
  specialize qw/aom_highbd_h_predictor_8x8 sse2/;
  specialize qw/aom_highbd_h_predictor_8x16 sse2/;
  specialize qw/aom_highbd_h_predictor_16x8 sse2/;
  specialize qw/aom_highbd_h_predictor_16x16 sse2/;
  specialize qw/aom_highbd_h_predictor_16x32 sse2/;
  specialize qw/aom_highbd_h_predictor_32x16 sse2/;
  specialize qw/aom_highbd_h_predictor_32x32 sse2/;
244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273
  specialize qw/aom_highbd_dc_left_predictor_4x4 sse2/;
  specialize qw/aom_highbd_dc_top_predictor_4x4 sse2/;
  specialize qw/aom_highbd_dc_128_predictor_4x4 sse2/;
  specialize qw/aom_highbd_dc_left_predictor_4x8 sse2/;
  specialize qw/aom_highbd_dc_top_predictor_4x8 sse2/;
  specialize qw/aom_highbd_dc_128_predictor_4x8 sse2/;
  specialize qw/aom_highbd_dc_left_predictor_8x4 sse2/;
  specialize qw/aom_highbd_dc_top_predictor_8x4 sse2/;
  specialize qw/aom_highbd_dc_128_predictor_8x4 sse2/;
  specialize qw/aom_highbd_dc_left_predictor_8x8 sse2/;
  specialize qw/aom_highbd_dc_top_predictor_8x8 sse2/;
  specialize qw/aom_highbd_dc_128_predictor_8x8 sse2/;
  specialize qw/aom_highbd_dc_left_predictor_8x16 sse2/;
  specialize qw/aom_highbd_dc_top_predictor_8x16 sse2/;
  specialize qw/aom_highbd_dc_128_predictor_8x16 sse2/;
  specialize qw/aom_highbd_dc_left_predictor_16x8 sse2/;
  specialize qw/aom_highbd_dc_top_predictor_16x8 sse2/;
  specialize qw/aom_highbd_dc_128_predictor_16x8 sse2/;
  specialize qw/aom_highbd_dc_left_predictor_16x16 sse2/;
  specialize qw/aom_highbd_dc_top_predictor_16x16 sse2/;
  specialize qw/aom_highbd_dc_128_predictor_16x16 sse2/;
  specialize qw/aom_highbd_dc_left_predictor_16x32 sse2/;
  specialize qw/aom_highbd_dc_top_predictor_16x32 sse2/;
  specialize qw/aom_highbd_dc_128_predictor_16x32 sse2/;
  specialize qw/aom_highbd_dc_left_predictor_32x16 sse2/;
  specialize qw/aom_highbd_dc_top_predictor_32x16 sse2/;
  specialize qw/aom_highbd_dc_128_predictor_32x16 sse2/;
  specialize qw/aom_highbd_dc_left_predictor_32x32 sse2/;
  specialize qw/aom_highbd_dc_top_predictor_32x32 sse2/;
  specialize qw/aom_highbd_dc_128_predictor_32x32 sse2/;
274 275 276 277 278 279 280 281 282 283 284 285 286
  
  specialize qw/aom_highbd_d117_predictor_4x4 sse2/;
  specialize qw/aom_highbd_d117_predictor_8x8 ssse3/;
  specialize qw/aom_highbd_d117_predictor_16x16 ssse3/;
  specialize qw/aom_highbd_d117_predictor_32x32 ssse3/;
  specialize qw/aom_highbd_d135_predictor_4x4 sse2/;
  specialize qw/aom_highbd_d135_predictor_8x8 ssse3/;
  specialize qw/aom_highbd_d135_predictor_16x16 ssse3/;
  specialize qw/aom_highbd_d135_predictor_32x32 ssse3/;
  specialize qw/aom_highbd_d153_predictor_4x4 sse2/;
  specialize qw/aom_highbd_d153_predictor_8x8 ssse3/;
  specialize qw/aom_highbd_d153_predictor_16x16 ssse3/;
  specialize qw/aom_highbd_d153_predictor_32x32 ssse3/;
287 288 289 290 291 292 293 294 295 296 297

  specialize qw/aom_highbd_d45e_predictor_4x4 sse2/;
  specialize qw/aom_highbd_d45e_predictor_4x8 sse2/;
  specialize qw/aom_highbd_d45e_predictor_8x4 sse2/;
  specialize qw/aom_highbd_d45e_predictor_8x8 sse2/;
  specialize qw/aom_highbd_d45e_predictor_8x16 sse2/;
  specialize qw/aom_highbd_d45e_predictor_16x8 avx2/;
  specialize qw/aom_highbd_d45e_predictor_16x16 avx2/;
  specialize qw/aom_highbd_d45e_predictor_16x32 avx2/;
  specialize qw/aom_highbd_d45e_predictor_32x16 avx2/;
  specialize qw/aom_highbd_d45e_predictor_32x32 avx2/;
298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319

  specialize qw/aom_highbd_d207e_predictor_4x4 sse2/;
  specialize qw/aom_highbd_d207e_predictor_4x8 sse2/;
  specialize qw/aom_highbd_d207e_predictor_8x4 sse2/;
  specialize qw/aom_highbd_d207e_predictor_8x8 sse2/;
  specialize qw/aom_highbd_d207e_predictor_8x16 sse2/;
  specialize qw/aom_highbd_d207e_predictor_16x8 sse2/;
  specialize qw/aom_highbd_d207e_predictor_16x16 sse2/;
  specialize qw/aom_highbd_d207e_predictor_16x32 sse2/;
  specialize qw/aom_highbd_d207e_predictor_32x16 avx2/;
  specialize qw/aom_highbd_d207e_predictor_32x32 avx2/;

  specialize qw/aom_highbd_d63e_predictor_4x4 sse2/;
  specialize qw/aom_highbd_d63e_predictor_4x8 sse2/;
  specialize qw/aom_highbd_d63e_predictor_8x4 sse2/;
  specialize qw/aom_highbd_d63e_predictor_8x8 sse2/;
  specialize qw/aom_highbd_d63e_predictor_8x16 sse2/;
  specialize qw/aom_highbd_d63e_predictor_16x8 avx2/;
  specialize qw/aom_highbd_d63e_predictor_16x16 avx2/;
  specialize qw/aom_highbd_d63e_predictor_16x32 avx2/;
  specialize qw/aom_highbd_d63e_predictor_32x16 avx2/;
  specialize qw/aom_highbd_d63e_predictor_32x32 avx2/;
320
}  # CONFIG_HIGHBITDEPTH
Yaowu Xu's avatar
Yaowu Xu committed
321 322 323 324

#
# Sub Pixel Filters
#
Fergus Simpson's avatar
Fergus Simpson committed
325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345
add_proto qw/void aom_convolve_copy/,             "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve_avg/,              "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8/,                 "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_horiz/,           "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_vert/,            "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_avg/,             "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_avg_horiz/,       "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_avg_vert/,        "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_scaled_2d/,                 "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_scaled_horiz/,              "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_scaled_vert/,               "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_scaled_avg_2d/,             "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_scaled_avg_horiz/,          "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_scaled_avg_vert/,           "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";

add_proto qw/void aom_convolve8_horiz_scale/,     "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int subpel_x, int x_step_q4, const int16_t *filter_y, int subpel_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_vert_scale/,      "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int subpel_x, int x_step_q4, const int16_t *filter_y, int subpel_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_avg_horiz_scale/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int subpel_x, int x_step_q4, const int16_t *filter_y, int subpel_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_avg_vert_scale/,  "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int subpel_x, int x_step_q4, const int16_t *filter_y, int subpel_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_scale/,           "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int subpel_x, int x_step_q4, const int16_t *filter_y, int subpel_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_avg_scale/,       "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int subpel_x, int x_step_q4, const int16_t *filter_y, int subpel_y, int y_step_q4, int w, int h";
Yaowu Xu's avatar
Yaowu Xu committed
346 347 348 349 350 351 352 353 354 355 356

specialize qw/aom_convolve_copy       sse2      /;
specialize qw/aom_convolve_avg        sse2      /;
specialize qw/aom_convolve8           sse2 ssse3/, "$avx2_ssse3";
specialize qw/aom_convolve8_horiz     sse2 ssse3/, "$avx2_ssse3";
specialize qw/aom_convolve8_vert      sse2 ssse3/, "$avx2_ssse3";
specialize qw/aom_convolve8_avg       sse2 ssse3/;
specialize qw/aom_convolve8_avg_horiz sse2 ssse3/;
specialize qw/aom_convolve8_avg_vert  sse2 ssse3/;
specialize qw/aom_scaled_2d                ssse3/;

357 358 359 360
if (aom_config("CONFIG_LOOP_RESTORATION") eq "yes") {
  add_proto qw/void aom_convolve8_add_src/,       "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
  add_proto qw/void aom_convolve8_add_src_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
  add_proto qw/void aom_convolve8_add_src_vert/,  "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
361 362 363
  add_proto qw/void aom_convolve8_add_src_hip/,       "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
  add_proto qw/void aom_convolve8_add_src_horiz_hip/, "const uint8_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
  add_proto qw/void aom_convolve8_add_src_vert_hip/,  "const uint16_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
364

365 366 367
  specialize qw/aom_convolve8_add_src ssse3/;
  specialize qw/aom_convolve8_add_src_horiz ssse3/;
  specialize qw/aom_convolve8_add_src_vert ssse3/;
368
  specialize qw/aom_convolve8_add_src_hip sse2/;
369 370
}  # CONFIG_LOOP_RESTORATION

Yaowu Xu's avatar
Yaowu Xu committed
371 372 373 374 375 376 377 378 379 380 381 382
# TODO(any): These need to be extended to up to 128x128 block sizes
if (!(aom_config("CONFIG_AV1") eq "yes" && aom_config("CONFIG_EXT_PARTITION") eq "yes")) {
  specialize qw/aom_convolve_copy       neon dspr2 msa/;
  specialize qw/aom_convolve_avg        neon dspr2 msa/;
  specialize qw/aom_convolve8           neon dspr2 msa/;
  specialize qw/aom_convolve8_horiz     neon dspr2 msa/;
  specialize qw/aom_convolve8_vert      neon dspr2 msa/;
  specialize qw/aom_convolve8_avg       neon dspr2 msa/;
  specialize qw/aom_convolve8_avg_horiz neon dspr2 msa/;
  specialize qw/aom_convolve8_avg_vert  neon dspr2 msa/;
}

383
if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
384
  add_proto qw/void aom_highbd_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
385
  specialize qw/aom_highbd_convolve_copy sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
386 387

  add_proto qw/void aom_highbd_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
388
  specialize qw/aom_highbd_convolve_avg sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
389 390

  add_proto qw/void aom_highbd_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
391
  specialize qw/aom_highbd_convolve8 avx2/, "$sse2_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
392 393

  add_proto qw/void aom_highbd_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
394
  specialize qw/aom_highbd_convolve8_horiz avx2/, "$sse2_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
395 396

  add_proto qw/void aom_highbd_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
397
  specialize qw/aom_highbd_convolve8_vert avx2/, "$sse2_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
398 399

  add_proto qw/void aom_highbd_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
400
  specialize qw/aom_highbd_convolve8_avg avx2/, "$sse2_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
401 402

  add_proto qw/void aom_highbd_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
403
  specialize qw/aom_highbd_convolve8_avg_horiz avx2/, "$sse2_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
404 405

  add_proto qw/void aom_highbd_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
406
  specialize qw/aom_highbd_convolve8_avg_vert avx2/, "$sse2_x86_64";
407 408 409

  if (aom_config("CONFIG_LOOP_RESTORATION") eq "yes") {
    add_proto qw/void aom_highbd_convolve8_add_src/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
410
    add_proto qw/void aom_highbd_convolve8_add_src_hip/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
411

412
    specialize qw/aom_highbd_convolve8_add_src/, "$sse2_x86_64";
413
    specialize qw/aom_highbd_convolve8_add_src_hip ssse3/;
414
  }  # CONFIG_LOOP_RESTORATION
415
}  # CONFIG_HIGHBITDEPTH
Yaowu Xu's avatar
Yaowu Xu committed
416 417 418 419 420

#
# Loopfilter
#
add_proto qw/void aom_lpf_vertical_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
421 422 423 424 425 426
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") {
  specialize qw/aom_lpf_vertical_16 sse2/;
} else {
  specialize qw/aom_lpf_vertical_16 sse2 neon_asm dspr2 msa/;
  $aom_lpf_vertical_16_neon_asm=aom_lpf_vertical_16_neon;
}
Yaowu Xu's avatar
Yaowu Xu committed
427 428

add_proto qw/void aom_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
429 430 431 432
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") ne "yes") {
  specialize qw/aom_lpf_vertical_16_dual sse2 neon_asm dspr2 msa/;
  $aom_lpf_vertical_16_dual_neon_asm=aom_lpf_vertical_16_dual_neon;
}
Yaowu Xu's avatar
Yaowu Xu committed
433 434

add_proto qw/void aom_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
435 436 437 438 439
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") {
  specialize qw/aom_lpf_vertical_8 sse2/;
} else {
  specialize qw/aom_lpf_vertical_8 sse2 neon dspr2 msa/;
}
Yaowu Xu's avatar
Yaowu Xu committed
440 441

add_proto qw/void aom_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
442 443 444 445
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") ne "yes") {
  specialize qw/aom_lpf_vertical_8_dual sse2 neon_asm dspr2 msa/;
  $aom_lpf_vertical_8_dual_neon_asm=aom_lpf_vertical_8_dual_neon;
}
Yaowu Xu's avatar
Yaowu Xu committed
446 447

add_proto qw/void aom_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
448 449 450 451 452
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") {
  specialize qw/aom_lpf_vertical_4 sse2/;
} else {
  specialize qw/aom_lpf_vertical_4 sse2 neon dspr2 msa/;
}
Yaowu Xu's avatar
Yaowu Xu committed
453 454

add_proto qw/void aom_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
455 456 457
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") ne "yes") {
  specialize qw/aom_lpf_vertical_4_dual sse2 neon dspr2 msa/;
}
Yaowu Xu's avatar
Yaowu Xu committed
458 459

add_proto qw/void aom_lpf_horizontal_edge_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
460 461 462 463 464 465
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") {
  specialize qw/aom_lpf_horizontal_edge_8 sse2/;
} else {
  specialize qw/aom_lpf_horizontal_edge_8 sse2 avx2 neon_asm dspr2 msa/;
  $aom_lpf_horizontal_edge_8_neon_asm=aom_lpf_horizontal_edge_8_neon;
}
Yaowu Xu's avatar
Yaowu Xu committed
466 467

add_proto qw/void aom_lpf_horizontal_edge_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
468 469 470 471 472 473
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") {
  specialize qw/aom_lpf_horizontal_edge_16 sse2/;
} else {
  specialize qw/aom_lpf_horizontal_edge_16 sse2 avx2 neon_asm dspr2 msa/;
  $aom_lpf_horizontal_edge_16_neon_asm=aom_lpf_horizontal_edge_16_neon;
}
Yaowu Xu's avatar
Yaowu Xu committed
474 475

add_proto qw/void aom_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
476 477 478 479 480
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") {
  specialize qw/aom_lpf_horizontal_8 sse2/;
} else {
  specialize qw/aom_lpf_horizontal_8 sse2 neon dspr2 msa/;
}
Yaowu Xu's avatar
Yaowu Xu committed
481 482

add_proto qw/void aom_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
483 484 485 486
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") ne "yes") {
  specialize qw/aom_lpf_horizontal_8_dual sse2 neon_asm dspr2 msa/;
  $aom_lpf_horizontal_8_dual_neon_asm=aom_lpf_horizontal_8_dual_neon;
}
Yaowu Xu's avatar
Yaowu Xu committed
487 488

add_proto qw/void aom_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
489 490 491 492 493
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") {
  specialize qw/aom_lpf_horizontal_4 sse2/;
} else {
  specialize qw/aom_lpf_horizontal_4 sse2 neon dspr2 msa/;
}
Yaowu Xu's avatar
Yaowu Xu committed
494 495

add_proto qw/void aom_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
496 497 498
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") ne "yes") {
  specialize qw/aom_lpf_horizontal_4_dual sse2 neon dspr2 msa/;
}
Yaowu Xu's avatar
Yaowu Xu committed
499

500
if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
501 502 503 504
  add_proto qw/void aom_highbd_lpf_vertical_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
  specialize qw/aom_highbd_lpf_vertical_16 sse2/;

  add_proto qw/void aom_highbd_lpf_vertical_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
Yi Luo's avatar
Yi Luo committed
505
  specialize qw/aom_highbd_lpf_vertical_16_dual sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
506 507 508 509 510

  add_proto qw/void aom_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
  specialize qw/aom_highbd_lpf_vertical_8 sse2/;

  add_proto qw/void aom_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
Yi Luo's avatar
Yi Luo committed
511
  specialize qw/aom_highbd_lpf_vertical_8_dual sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
512 513 514 515 516

  add_proto qw/void aom_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
  specialize qw/aom_highbd_lpf_vertical_4 sse2/;

  add_proto qw/void aom_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
Yi Luo's avatar
Yi Luo committed
517
  specialize qw/aom_highbd_lpf_vertical_4_dual sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
518 519 520 521 522

  add_proto qw/void aom_highbd_lpf_horizontal_edge_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
  specialize qw/aom_highbd_lpf_horizontal_edge_8 sse2/;

  add_proto qw/void aom_highbd_lpf_horizontal_edge_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
Yi Luo's avatar
Yi Luo committed
523
  specialize qw/aom_highbd_lpf_horizontal_edge_16 sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
524 525 526 527 528

  add_proto qw/void aom_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
  specialize qw/aom_highbd_lpf_horizontal_8 sse2/;

  add_proto qw/void aom_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
Yi Luo's avatar
Yi Luo committed
529
  specialize qw/aom_highbd_lpf_horizontal_8_dual sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
530 531 532 533 534

  add_proto qw/void aom_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
  specialize qw/aom_highbd_lpf_horizontal_4 sse2/;

  add_proto qw/void aom_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
Yi Luo's avatar
Yi Luo committed
535
  specialize qw/aom_highbd_lpf_horizontal_4_dual sse2 avx2/;
536
}  # CONFIG_HIGHBITDEPTH
Yaowu Xu's avatar
Yaowu Xu committed
537 538 539 540 541 542 543 544

#
# Encoder functions.
#

#
# Forward transform
#
Yushin Cho's avatar
Yushin Cho committed
545
if (aom_config("CONFIG_AV1_ENCODER") eq "yes"){
546
  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
547 548 549 550 551 552 553
    add_proto qw/void aom_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct4x4 sse2/;

    add_proto qw/void aom_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct4x4_1 sse2/;

    add_proto qw/void aom_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
554
    specialize qw/aom_fdct8x8 sse2/, "$ssse3_x86_64";
555 556 557 558 559 560 561 562 563 564 565

    add_proto qw/void aom_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct16x16 sse2/;

    add_proto qw/void aom_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct32x32 sse2 avx2/;

    add_proto qw/void aom_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct32x32_rd sse2 avx2/;

    # High bit depth
566 567
    add_proto qw/void aom_highbd_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_highbd_fdct4x4 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
568

569 570
    add_proto qw/void aom_highbd_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_highbd_fdct8x8 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
571

572 573
    add_proto qw/void aom_highbd_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_highbd_fdct16x16 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
574

575 576
    add_proto qw/void aom_highbd_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_highbd_fdct32x32 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
577

578 579
    add_proto qw/void aom_highbd_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_highbd_fdct32x32_rd sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
580

581 582 583
  } else {
    add_proto qw/void aom_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct4x4 sse2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
584

585 586
    add_proto qw/void aom_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct4x4_1 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
587

588 589
    add_proto qw/void aom_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct8x8 sse2 neon msa/, "$ssse3_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
590

591 592
    add_proto qw/void aom_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct16x16 sse2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
593

594 595
    add_proto qw/void aom_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct32x32 sse2 avx2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
596

597 598
    add_proto qw/void aom_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct32x32_rd sse2 avx2 msa/;
599
  }  # CONFIG_HIGHBITDEPTH
Yaowu Xu's avatar
Yaowu Xu committed
600 601 602 603 604 605 606 607 608 609 610 611 612 613
}  # CONFIG_AV1_ENCODER

#
# Inverse transform
if (aom_config("CONFIG_AV1") eq "yes") {
  add_proto qw/void aom_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

  add_proto qw/void aom_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_iwht4x4_16_add sse2/;

  add_proto qw/void aom_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

  add_proto qw/void aom_highbd_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

James Zern's avatar
James Zern committed
614 615
  add_proto qw/void aom_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct4x4_16_add sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
616

James Zern's avatar
James Zern committed
617 618
  add_proto qw/void aom_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct4x4_1_add sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
619

James Zern's avatar
James Zern committed
620 621
  add_proto qw/void aom_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct8x8_64_add sse2 ssse3/;
Yaowu Xu's avatar
Yaowu Xu committed
622

James Zern's avatar
James Zern committed
623 624
  add_proto qw/void aom_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct8x8_12_add sse2 ssse3/;
Yaowu Xu's avatar
Yaowu Xu committed
625

James Zern's avatar
James Zern committed
626 627
  add_proto qw/void aom_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct8x8_1_add sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
628

James Zern's avatar
James Zern committed
629 630
  add_proto qw/void aom_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct16x16_256_add sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
631

James Zern's avatar
James Zern committed
632 633
  add_proto qw/void aom_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct16x16_38_add avx2/;
634

James Zern's avatar
James Zern committed
635 636
  add_proto qw/void aom_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct16x16_10_add sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
637

James Zern's avatar
James Zern committed
638 639
  add_proto qw/void aom_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct16x16_1_add sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
640

James Zern's avatar
James Zern committed
641 642
  add_proto qw/void aom_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct32x32_1024_add sse2 ssse3 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
643

James Zern's avatar
James Zern committed
644 645 646 647
  add_proto qw/void aom_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct32x32_135_add sse2 ssse3 avx2/;
  # Need to add 135 eob idct32x32 implementations.
  $aom_idct32x32_135_add_sse2=aom_idct32x32_1024_add_sse2;
Yaowu Xu's avatar
Yaowu Xu committed
648

James Zern's avatar
James Zern committed
649 650
  add_proto qw/void aom_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct32x32_34_add sse2 ssse3 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
651

James Zern's avatar
James Zern committed
652 653 654 655
  add_proto qw/void aom_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct32x32_1_add sse2 avx2/;
  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
  } else {
Yaowu Xu's avatar
Yaowu Xu committed
656 657 658 659 660 661 662 663 664 665
    add_proto qw/void aom_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/aom_idct4x4_1_add sse2 neon dspr2 msa/;

    add_proto qw/void aom_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/aom_idct4x4_16_add sse2 neon dspr2 msa/;

    add_proto qw/void aom_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/aom_idct8x8_1_add sse2 neon dspr2 msa/;

    add_proto qw/void aom_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
666
    specialize qw/aom_idct8x8_64_add sse2 ssse3 neon dspr2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
667 668

    add_proto qw/void aom_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
669
    specialize qw/aom_idct8x8_12_add sse2 ssse3 neon dspr2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
670 671

    add_proto qw/void aom_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
Yi Luo's avatar
Yi Luo committed
672
    specialize qw/aom_idct16x16_1_add sse2 avx2 neon dspr2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
673 674

    add_proto qw/void aom_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
Yi Luo's avatar
Yi Luo committed
675
    specialize qw/aom_idct16x16_256_add sse2 avx2 neon dspr2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
676

677
    add_proto qw/void aom_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
Yi Luo's avatar
Yi Luo committed
678
    specialize qw/aom_idct16x16_38_add avx2/;
679

Yaowu Xu's avatar
Yaowu Xu committed
680
    add_proto qw/void aom_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
Yi Luo's avatar
Yi Luo committed
681
    specialize qw/aom_idct16x16_10_add sse2 avx2 neon dspr2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
682 683

    add_proto qw/void aom_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
Yi Luo's avatar
Yi Luo committed
684
    specialize qw/aom_idct32x32_1024_add sse2 ssse3 avx2 neon dspr2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
685 686

    add_proto qw/void aom_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
Yi Luo's avatar
Yi Luo committed
687
    specialize qw/aom_idct32x32_135_add sse2 ssse3 avx2 neon dspr2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
688 689 690 691 692 693 694
    # Need to add 135 eob idct32x32 implementations.
    $aom_idct32x32_135_add_sse2=aom_idct32x32_1024_add_sse2;
    $aom_idct32x32_135_add_neon=aom_idct32x32_1024_add_neon;
    $aom_idct32x32_135_add_dspr2=aom_idct32x32_1024_add_dspr2;
    $aom_idct32x32_135_add_msa=aom_idct32x32_1024_add_msa;

    add_proto qw/void aom_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
Yi Luo's avatar
Yi Luo committed
695
    specialize qw/aom_idct32x32_34_add sse2 ssse3 avx2 neon dspr2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
696 697 698 699
    # Need to add 34 eob idct32x32 neon implementation.
    $aom_idct32x32_34_add_neon=aom_idct32x32_1024_add_neon;

    add_proto qw/void aom_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
Yi Luo's avatar
Yi Luo committed
700
    specialize qw/aom_idct32x32_1_add sse2 avx2 neon dspr2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
701 702 703 704 705 706

    add_proto qw/void aom_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/aom_iwht4x4_1_add msa/;

    add_proto qw/void aom_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/aom_iwht4x4_16_add msa sse2/;
James Zern's avatar
James Zern committed
707
  }  # CONFIG_HIGHBITDEPTH
Yaowu Xu's avatar
Yaowu Xu committed
708 709 710 711 712
}  # CONFIG_AV1

#
# Quantization
#
Thomas Davies's avatar
Thomas Davies committed
713
if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
James Zern's avatar
James Zern committed
714 715
  add_proto qw/void aom_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
  specialize qw/aom_quantize_b sse2/, "$ssse3_x86_64", "$avx_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
716

James Zern's avatar
James Zern committed
717 718
  add_proto qw/void aom_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
  specialize qw/aom_quantize_b_32x32/, "$ssse3_x86_64", "$avx_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
719

James Zern's avatar
James Zern committed
720
  add_proto qw/void aom_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
Thomas Davies's avatar
Thomas Davies committed
721
}  # CONFIG_AV1_ENCODER
722

Thomas Davies's avatar
Thomas Davies committed
723 724 725
if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
  add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
  specialize qw/aom_highbd_quantize_b sse2 avx2/;
726

Thomas Davies's avatar
Thomas Davies committed
727 728
  add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
  specialize qw/aom_highbd_quantize_b_32x32 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
729

Thomas Davies's avatar
Thomas Davies committed
730
  add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
731

Thomas Davies's avatar
Thomas Davies committed
732
}  # CONFIG_AV1_ENCODER
Yaowu Xu's avatar
Yaowu Xu committed
733 734 735 736
if (aom_config("CONFIG_AV1") eq "yes") {
  #
  # Alpha blending with mask
  #
737 738 739
  if (aom_config("CONFIG_CONVOLVE_ROUND") eq "yes") {
    add_proto qw/void aom_blend_a64_d32_mask/, "int32_t *dst, uint32_t dst_stride, const int32_t *src0, uint32_t src0_stride, const int32_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx";
  }
Yaowu Xu's avatar
Yaowu Xu committed
740 741 742 743 744 745 746
  add_proto qw/void aom_blend_a64_mask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx";
  add_proto qw/void aom_blend_a64_hmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w";
  add_proto qw/void aom_blend_a64_vmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w";
  specialize "aom_blend_a64_mask", qw/sse4_1/;
  specialize "aom_blend_a64_hmask", qw/sse4_1/;
  specialize "aom_blend_a64_vmask", qw/sse4_1/;

747
  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
748 749 750 751 752 753 754 755 756
    add_proto qw/void aom_highbd_blend_a64_mask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx, int bd";
    add_proto qw/void aom_highbd_blend_a64_hmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w, int bd";
    add_proto qw/void aom_highbd_blend_a64_vmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w, int bd";
    specialize "aom_highbd_blend_a64_mask", qw/sse4_1/;
    specialize "aom_highbd_blend_a64_hmask", qw/sse4_1/;
    specialize "aom_highbd_blend_a64_vmask", qw/sse4_1/;
  }
}  # CONFIG_AV1

757
if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
758
  #
James Zern's avatar
James Zern committed
759
  # Block subtraction
Yaowu Xu's avatar
Yaowu Xu committed
760
  #
James Zern's avatar
James Zern committed
761 762 763 764 765 766 767 768 769 770 771 772
  add_proto qw/void aom_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
  specialize qw/aom_subtract_block neon msa sse2/;

  if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
    #
    # Sum of Squares
    #
    add_proto qw/uint64_t aom_sum_squares_2d_i16/, "const int16_t *src, int stride, int width, int height";
    specialize qw/aom_sum_squares_2d_i16 sse2/;

    add_proto qw/uint64_t aom_sum_squares_i16/, "const int16_t *src, uint32_t N";
    specialize qw/aom_sum_squares_i16 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
773 774
  }

James Zern's avatar
James Zern committed
775

Yaowu Xu's avatar
Yaowu Xu committed
776
  #
James Zern's avatar
James Zern committed
777
  # Avg
Yaowu Xu's avatar
Yaowu Xu committed
778
  #
James Zern's avatar
James Zern committed
779 780 781 782 783 784 785 786 787
  if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
    #
    # Avg
    #
    specialize qw/aom_avg_8x8 sse2 neon msa/;
    if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
      add_proto qw/void aom_highbd_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride, int bd";
      specialize qw/aom_highbd_subtract_block sse2/;
    }
Yaowu Xu's avatar
Yaowu Xu committed
788

James Zern's avatar
James Zern committed
789 790 791 792 793 794 795 796
    #
    # Minmax
    #
    add_proto qw/void aom_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
    specialize qw/aom_minmax_8x8 sse2 neon/;
    if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
      add_proto qw/void aom_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
    }
Yaowu Xu's avatar
Yaowu Xu committed
797

James Zern's avatar
James Zern committed
798 799
    add_proto qw/void aom_hadamard_8x8/, "const int16_t *src_diff, int src_stride, int16_t *coeff";
    specialize qw/aom_hadamard_8x8 sse2 neon/, "$ssse3_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
800

James Zern's avatar
James Zern committed
801 802
    add_proto qw/void aom_hadamard_16x16/, "const int16_t *src_diff, int src_stride, int16_t *coeff";
    specialize qw/aom_hadamard_16x16 sse2 neon/;
Yaowu Xu's avatar
Yaowu Xu committed
803

James Zern's avatar
James Zern committed
804 805
    add_proto qw/int aom_satd/, "const int16_t *coeff, int length";
    specialize qw/aom_satd sse2 neon/;
Yaowu Xu's avatar
Yaowu Xu committed
806

James Zern's avatar
James Zern committed
807 808
    add_proto qw/void aom_int_pro_row/, "int16_t *hbuf, const uint8_t *ref, int ref_stride, int height";
    specialize qw/aom_int_pro_row sse2 neon/;
Yaowu Xu's avatar
Yaowu Xu committed
809

James Zern's avatar
James Zern committed
810 811
    add_proto qw/int16_t aom_int_pro_col/, "const uint8_t *ref, int width";
    specialize qw/aom_int_pro_col sse2 neon/;
Yaowu Xu's avatar
Yaowu Xu committed
812

James Zern's avatar
James Zern committed
813 814 815
    add_proto qw/int aom_vector_var/, "const int16_t *ref, const int16_t *src, int bwl";
    specialize qw/aom_vector_var neon sse2/;
  }  # CONFIG_AV1_ENCODER
Yaowu Xu's avatar
Yaowu Xu committed
816

James Zern's avatar
James Zern committed
817 818 819
  #
  # Single block SAD / Single block Avg SAD
  #
Yaowu Xu's avatar
Yaowu Xu committed
820 821
  foreach (@block_sizes) {
    ($w, $h) = @$_;
James Zern's avatar
James Zern committed
822 823
    add_proto qw/unsigned int/, "aom_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
    add_proto qw/unsigned int/, "aom_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
Cheng Chen's avatar
Cheng Chen committed
824 825 826
    if (aom_config("CONFIG_JNT_COMP") eq "yes") {
      add_proto qw/unsigned int/, "aom_jnt_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, const JNT_COMP_PARAMS *jcp_param";
    }
Yaowu Xu's avatar
Yaowu Xu committed
827 828
  }

James Zern's avatar
James Zern committed
829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861
  specialize qw/aom_sad128x128    avx2          sse2/;
  specialize qw/aom_sad128x64     avx2          sse2/;
  specialize qw/aom_sad64x128     avx2          sse2/;
  specialize qw/aom_sad64x64      avx2 neon msa sse2/;
  specialize qw/aom_sad64x32      avx2      msa sse2/;
  specialize qw/aom_sad32x64      avx2      msa sse2/;
  specialize qw/aom_sad32x32      avx2 neon msa sse2/;
  specialize qw/aom_sad32x16      avx2      msa sse2/;
  specialize qw/aom_sad16x32                msa sse2/;
  specialize qw/aom_sad16x16           neon msa sse2/;
  specialize qw/aom_sad16x8            neon msa sse2/;
  specialize qw/aom_sad8x16            neon msa sse2/;
  specialize qw/aom_sad8x8             neon msa sse2/;
  specialize qw/aom_sad8x4                  msa sse2/;
  specialize qw/aom_sad4x8                  msa sse2/;
  specialize qw/aom_sad4x4             neon msa sse2/;

  specialize qw/aom_sad128x128_avg avx2     sse2/;
  specialize qw/aom_sad128x64_avg  avx2     sse2/;
  specialize qw/aom_sad64x128_avg  avx2     sse2/;
  specialize qw/aom_sad64x64_avg   avx2 msa sse2/;
  specialize qw/aom_sad64x32_avg   avx2 msa sse2/;
  specialize qw/aom_sad32x64_avg   avx2 msa sse2/;
  specialize qw/aom_sad32x32_avg   avx2 msa sse2/;
  specialize qw/aom_sad32x16_avg   avx2 msa sse2/;
  specialize qw/aom_sad16x32_avg        msa sse2/;
  specialize qw/aom_sad16x16_avg        msa sse2/;
  specialize qw/aom_sad16x8_avg         msa sse2/;
  specialize qw/aom_sad8x16_avg         msa sse2/;
  specialize qw/aom_sad8x8_avg          msa sse2/;
  specialize qw/aom_sad8x4_avg          msa sse2/;
  specialize qw/aom_sad4x8_avg          msa sse2/;
  specialize qw/aom_sad4x4_avg          msa sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
862

863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906
  if (aom_config("CONFIG_JNT_COMP") eq "yes") {
    specialize qw/aom_jnt_sad128x128_avg ssse3/;
    specialize qw/aom_jnt_sad128x64_avg  ssse3/;
    specialize qw/aom_jnt_sad64x128_avg  ssse3/;
    specialize qw/aom_jnt_sad64x64_avg   ssse3/;
    specialize qw/aom_jnt_sad64x32_avg   ssse3/;
    specialize qw/aom_jnt_sad32x64_avg   ssse3/;
    specialize qw/aom_jnt_sad32x32_avg   ssse3/;
    specialize qw/aom_jnt_sad32x16_avg   ssse3/;
    specialize qw/aom_jnt_sad16x32_avg   ssse3/;
    specialize qw/aom_jnt_sad16x16_avg   ssse3/;
    specialize qw/aom_jnt_sad16x8_avg    ssse3/;
    specialize qw/aom_jnt_sad8x16_avg    ssse3/;
    specialize qw/aom_jnt_sad8x8_avg     ssse3/;
    specialize qw/aom_jnt_sad8x4_avg     ssse3/;
    specialize qw/aom_jnt_sad4x8_avg     ssse3/;
    specialize qw/aom_jnt_sad4x4_avg     ssse3/;

    if (aom_config("CONFIG_EXT_PARTITION_TYPES") eq "yes") {
      specialize qw/aom_jnt_sad4x16_avg     ssse3/;
      specialize qw/aom_jnt_sad16x4_avg     ssse3/;
      specialize qw/aom_jnt_sad8x32_avg     ssse3/;
      specialize qw/aom_jnt_sad32x8_avg     ssse3/;
      specialize qw/aom_jnt_sad16x64_avg     ssse3/;
      specialize qw/aom_jnt_sad64x16_avg     ssse3/;
      specialize qw/aom_jnt_sad32x128_avg     ssse3/;
      specialize qw/aom_jnt_sad128x32_avg     ssse3/;
    }

    add_proto qw/unsigned int/, "aom_sad4xh", "const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, int width, int height";
    add_proto qw/unsigned int/, "aom_sad8xh", "const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, int width, int height";
    add_proto qw/unsigned int/, "aom_sad16xh", "const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, int width, int height";
    add_proto qw/unsigned int/, "aom_sad32xh", "const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, int width, int height";
    add_proto qw/unsigned int/, "aom_sad64xh", "const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, int width, int height";
    add_proto qw/unsigned int/, "aom_sad128xh", "const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, int width, int height";

    specialize qw/aom_sad4xh   sse2/;
    specialize qw/aom_sad8xh   sse2/;
    specialize qw/aom_sad16xh  sse2/;
    specialize qw/aom_sad32xh  sse2/;
    specialize qw/aom_sad64xh  sse2/;
    specialize qw/aom_sad128xh sse2/;
  }

907
  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
908 909
    foreach (@block_sizes) {
      ($w, $h) = @$_;
James Zern's avatar
James Zern committed
910 911 912 913 914 915
      add_proto qw/unsigned int/, "aom_highbd_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
      add_proto qw/unsigned int/, "aom_highbd_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
      if ($w != 128 && $h != 128 && $w != 4) {
        specialize "aom_highbd_sad${w}x${h}", qw/sse2/;
        specialize "aom_highbd_sad${w}x${h}_avg", qw/sse2/;
      }
Yaowu Xu's avatar
Yaowu Xu committed
916
    }
James Zern's avatar
James Zern committed
917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939
    specialize qw/aom_highbd_sad128x128 avx2/;
    specialize qw/aom_highbd_sad128x64  avx2/;
    specialize qw/aom_highbd_sad64x128  avx2/;
    specialize qw/aom_highbd_sad64x64   avx2/;
    specialize qw/aom_highbd_sad64x32   avx2/;
    specialize qw/aom_highbd_sad32x64   avx2/;
    specialize qw/aom_highbd_sad32x32   avx2/;
    specialize qw/aom_highbd_sad32x16   avx2/;
    specialize qw/aom_highbd_sad16x32   avx2/;
    specialize qw/aom_highbd_sad16x16   avx2/;
    specialize qw/aom_highbd_sad16x8    avx2/;

    specialize qw/aom_highbd_sad128x128_avg avx2/;
    specialize qw/aom_highbd_sad128x64_avg  avx2/;
    specialize qw/aom_highbd_sad64x128_avg  avx2/;
    specialize qw/aom_highbd_sad64x64_avg   avx2/;
    specialize qw/aom_highbd_sad64x32_avg   avx2/;
    specialize qw/aom_highbd_sad32x64_avg   avx2/;
    specialize qw/aom_highbd_sad32x32_avg   avx2/;
    specialize qw/aom_highbd_sad32x16_avg   avx2/;
    specialize qw/aom_highbd_sad16x32_avg   avx2/;
    specialize qw/aom_highbd_sad16x16_avg   avx2/;
    specialize qw/aom_highbd_sad16x8_avg    avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
940 941
  }

James Zern's avatar
James Zern committed
942 943 944
  #
  # Masked SAD
  #
945 946 947 948 949 950 951
  foreach (@block_sizes) {
    ($w, $h) = @$_;
    add_proto qw/unsigned int/, "aom_masked_sad${w}x${h}", "const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask";
    specialize "aom_masked_sad${w}x${h}", qw/ssse3/;
  }

  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
James Zern's avatar
James Zern committed
952 953
    foreach (@block_sizes) {
      ($w, $h) = @$_;
954 955
      add_proto qw/unsigned int/, "aom_highbd_masked_sad${w}x${h}", "const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask";
      specialize "aom_highbd_masked_sad${w}x${h}", qw/ssse3/;
James Zern's avatar
James Zern committed
956
    }
957
  }
Yaowu Xu's avatar
Yaowu Xu committed
958

James Zern's avatar
James Zern committed
959 960 961
  #
  # OBMC SAD
  #
962 963 964 965 966 967 968 969 970
  foreach (@block_sizes) {
    ($w, $h) = @$_;
    add_proto qw/unsigned int/, "aom_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask";
    if (! (($w == 128 && $h == 32) || ($w == 32 && $h == 128))) {
       specialize "aom_obmc_sad${w}x${h}", qw/sse4_1/;
    }
  }

  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
971 972
    foreach (@block_sizes) {
      ($w, $h) = @$_;
973
      add_proto qw/unsigned int/, "aom_highbd_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask";
974
      if (! (($w == 128 && $h == 32) || ($w == 32 && $h == 128))) {
975
        specialize "aom_highbd_obmc_sad${w}x${h}", qw/sse4_1/;
James Zern's avatar
James Zern committed
976
      }
Yaowu Xu's avatar
Yaowu Xu committed
977 978 979
    }
  }

James Zern's avatar
James Zern committed
980 981 982 983 984 985 986 987 988 989 990 991
  #
  # Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally
  #
  # Blocks of 3
  foreach $s (@block_widths) {
    add_proto qw/void/, "aom_sad${s}x${s}x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
  }
  specialize qw/aom_sad64x64x3            msa/;
  specialize qw/aom_sad32x32x3            msa/;
  specialize qw/aom_sad16x16x3 sse3 ssse3 msa/;
  specialize qw/aom_sad8x8x3   sse3       msa/;
  specialize qw/aom_sad4x4x3   sse3       msa/;
Yaowu Xu's avatar
Yaowu Xu committed
992

James Zern's avatar
James Zern committed
993 994 995 996 997 998
  add_proto qw/void/, "aom_sad16x8x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
  specialize qw/aom_sad16x8x3 sse3 ssse3 msa/;
  add_proto qw/void/, "aom_sad8x16x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
  specialize qw/aom_sad8x16x3 sse3 msa/;

  # Blocks of 8
Yaowu Xu's avatar
Yaowu Xu committed
999
  foreach $s (@block_widths) {
James Zern's avatar
James Zern committed
1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023
    add_proto qw/void/, "aom_sad${s}x${s}x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
  }
  specialize qw/aom_sad64x64x8        msa/;
  specialize qw/aom_sad32x32x8        msa/;
  specialize qw/aom_sad16x16x8 sse4_1 msa/;
  specialize qw/aom_sad8x8x8   sse4_1 msa/;
  specialize qw/aom_sad4x4x8   sse4_1 msa/;

  add_proto qw/void/, "aom_sad16x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
  specialize qw/aom_sad16x8x8 sse4_1 msa/;
  add_proto qw/void/, "aom_sad8x16x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
  specialize qw/aom_sad8x16x8 sse4_1 msa/;
  add_proto qw/void/, "aom_sad8x4x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
  specialize qw/aom_sad8x4x8 msa/;
  add_proto qw/void/, "aom_sad4x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
  specialize qw/aom_sad4x8x8 msa/;

  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
    foreach $s (@block_widths) {
      # Blocks of 3
      add_proto qw/void/, "aom_highbd_sad${s}x${s}x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
      # Blocks of 8
      add_proto qw/void/, "aom_highbd_sad${s}x${s}x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
    }
Yaowu Xu's avatar
Yaowu Xu committed
1024
    # Blocks of 3
James Zern's avatar
James Zern committed
1025 1026
    add_proto qw/void/, "aom_highbd_sad16x8x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
    add_proto qw/void/, "aom_highbd_sad8x16x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
Yaowu Xu's avatar
Yaowu Xu committed
1027
    # Blocks of 8
James Zern's avatar
James Zern committed
1028 1029 1030 1031
    add_proto qw/void/, "aom_highbd_sad16x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
    add_proto qw/void/, "aom_highbd_sad8x16x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
    add_proto qw/void/, "aom_highbd_sad8x4x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
    add_proto qw/void/, "aom_highbd_sad4x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
Yaowu Xu's avatar
Yaowu Xu committed
1032 1033 1034 1035 1036 1037 1038
  }

  #
  # Multi-block SAD, comparing a reference to N independent blocks
  #
  foreach (@block_sizes) {
    ($w, $h) = @$_;
James Zern's avatar
James Zern committed
1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068
    add_proto qw/void/, "aom_sad${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
  }

  specialize qw/aom_sad128x128x4d avx2          sse2/;
  specialize qw/aom_sad128x64x4d  avx2          sse2/;
  specialize qw/aom_sad64x128x4d  avx2          sse2/;
  specialize qw/aom_sad64x64x4d   avx2 neon msa sse2/;
  specialize qw/aom_sad64x32x4d   avx2      msa sse2/;
  specialize qw/aom_sad32x64x4d   avx2      msa sse2/;
  specialize qw/aom_sad32x32x4d   avx2 neon msa sse2/;
  specialize qw/aom_sad32x16x4d             msa sse2/;
  specialize qw/aom_sad16x32x4d             msa sse2/;
  specialize qw/aom_sad16x16x4d        neon msa sse2/;
  specialize qw/aom_sad16x8x4d              msa sse2/;
  specialize qw/aom_sad8x16x4d              msa sse2/;
  specialize qw/aom_sad8x8x4d               msa sse2/;
  specialize qw/aom_sad8x4x4d               msa sse2/;
  specialize qw/aom_sad4x8x4d               msa sse2/;
  specialize qw/aom_sad4x4x4d               msa sse2/;

  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
    #
    # Multi-block SAD, comparing a reference to N independent blocks
    #
    foreach (@block_sizes) {
      ($w, $h) = @$_;
      add_proto qw/void/, "aom_highbd_sad${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
      if ($w != 128 && $h != 128) {
        specialize "aom_highbd_sad${w}x${h}x4d", qw/sse2/;
      }
Yaowu Xu's avatar
Yaowu Xu committed
1069
    }
James Zern's avatar
James Zern committed
1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080
    specialize qw/aom_highbd_sad128x128x4d avx2/;
    specialize qw/aom_highbd_sad128x64x4d  avx2/;
    specialize qw/aom_highbd_sad64x128x4d  avx2/;
    specialize qw/aom_highbd_sad64x64x4d   avx2/;
    specialize qw/aom_highbd_sad64x32x4d   avx2/;
    specialize qw/aom_highbd_sad32x64x4d   avx2/;
    specialize qw/aom_highbd_sad32x32x4d   avx2/;
    specialize qw/aom_highbd_sad32x16x4d   avx2/;
    specialize qw/aom_highbd_sad16x32x4d   avx2/;
    specialize qw/aom_highbd_sad16x16x4d   avx2/;
    specialize qw/aom_highbd_sad16x8x4d    avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
1081 1082
  }

James Zern's avatar
James Zern committed
1083 1084 1085 1086 1087 1088
  #
  # Structured Similarity (SSIM)
  #
  if (aom_config("CONFIG_INTERNAL_STATS") eq "yes") {
    add_proto qw/void aom_ssim_parms_8x8/, "const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
    specialize qw/aom_ssim_parms_8x8/, "$sse2_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
1089

James Zern's avatar
James Zern committed
1090 1091
    add_proto qw/void aom_ssim_parms_16x16/, "const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
    specialize qw/aom_ssim_parms_16x16/, "$sse2_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
1092

James Zern's avatar
James Zern committed
1093