aom_dsp_rtcd_defs.pl 106 KB
Newer Older
Yaowu Xu's avatar
Yaowu Xu committed
1 2 3 4 5 6 7 8
sub aom_dsp_forward_decls() {
print <<EOF
/*
 * DSP
 */

#include "aom/aom_integer.h"
#include "aom_dsp/aom_dsp_common.h"
9
#include "av1/common/enums.h"
Yaowu Xu's avatar
Yaowu Xu committed
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42

EOF
}
forward_decls qw/aom_dsp_forward_decls/;

# optimizations which depend on multiple features
$avx2_ssse3 = '';
if ((aom_config("HAVE_AVX2") eq "yes") && (aom_config("HAVE_SSSE3") eq "yes")) {
  $avx2_ssse3 = 'avx2';
}

# functions that are 64 bit only.
$mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 = $avx_x86_64 = $avx2_x86_64 = '';
if ($opts{arch} eq "x86_64") {
  $mmx_x86_64 = 'mmx';
  $sse2_x86_64 = 'sse2';
  $ssse3_x86_64 = 'ssse3';
  $avx_x86_64 = 'avx';
  $avx2_x86_64 = 'avx2';
}

if (aom_config("CONFIG_EXT_PARTITION") eq "yes") {
  @block_widths = (4, 8, 16, 32, 64, 128)
} else {
  @block_widths = (4, 8, 16, 32, 64)
}

@block_sizes = ();
foreach $w (@block_widths) {
  foreach $h (@block_widths) {
    push @block_sizes, [$w, $h] if ($w <= 2*$h && $h <= 2*$w) ;
  }
}
43
if (aom_config("CONFIG_EXT_PARTITION_TYPES") eq "yes") {
44 45 46 47
  push @block_sizes, [4, 16];
  push @block_sizes, [16, 4];
  push @block_sizes, [8, 32];
  push @block_sizes, [32, 8];
48 49
  push @block_sizes, [16, 64];
  push @block_sizes, [64, 16];
50 51 52 53
  if (aom_config("CONFIG_EXT_PARTITION") eq "yes") {
      push @block_sizes, [32, 128];
      push @block_sizes, [128, 32];
  }
54
}
Yaowu Xu's avatar
Yaowu Xu committed
55

56 57 58 59
@tx_dims = (2, 4, 8, 16, 32);
if (aom_config("CONFIG_TX64X64") eq "yes") {
  push @tx_dims, '64';
}
60

61 62 63 64 65 66 67 68
@tx_sizes = ();
foreach $w (@tx_dims) {
  push @tx_sizes, [$w, $w];
  foreach $h (@tx_dims) {
    push @tx_sizes, [$w, $h] if ($w >=4 && $h >=4 && ($w == 2*$h || $h == 2*$w));
  }
}

Urvang Joshi's avatar
Urvang Joshi committed
69 70 71
@pred_names = qw/dc dc_top dc_left dc_128 v h d207e d63e d45e d117 d135 d153 paeth smooth/;
if (aom_config("CONFIG_SMOOTH_HV") eq "yes") {
  push @pred_names, qw/smooth_v smooth_h/;
72
}
73

74 75 76
#
# Intra prediction
#
77

78 79
foreach (@tx_sizes) {
  ($w, $h) = @$_;
80 81 82
  foreach $pred_name (@pred_names) {
    add_proto "void", "aom_${pred_name}_predictor_${w}x${h}",
              "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
83
    if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
84 85 86 87 88
      add_proto "void", "aom_highbd_${pred_name}_predictor_${w}x${h}",
                "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    }
  }
}
89

90 91 92 93 94 95 96 97
specialize qw/aom_dc_top_predictor_4x4 msa neon sse2/;
specialize qw/aom_dc_top_predictor_4x8 sse2/;
specialize qw/aom_dc_top_predictor_8x4 sse2/;
specialize qw/aom_dc_top_predictor_8x8 neon msa sse2/;
specialize qw/aom_dc_top_predictor_8x16 sse2/;
specialize qw/aom_dc_top_predictor_16x8 sse2/;
specialize qw/aom_dc_top_predictor_16x16 neon msa sse2/;
specialize qw/aom_dc_top_predictor_16x32 sse2/;
98 99
specialize qw/aom_dc_top_predictor_32x16 sse2 avx2/;
specialize qw/aom_dc_top_predictor_32x32 msa neon sse2 avx2/;
100 101 102 103 104 105 106 107
specialize qw/aom_dc_left_predictor_4x4 msa neon sse2/;
specialize qw/aom_dc_left_predictor_4x8 sse2/;
specialize qw/aom_dc_left_predictor_8x4 sse2/;
specialize qw/aom_dc_left_predictor_8x8 neon msa sse2/;
specialize qw/aom_dc_left_predictor_8x16 sse2/;
specialize qw/aom_dc_left_predictor_16x8 sse2/;
specialize qw/aom_dc_left_predictor_16x16 neon msa sse2/;
specialize qw/aom_dc_left_predictor_16x32 sse2/;
108 109
specialize qw/aom_dc_left_predictor_32x16 sse2 avx2/;
specialize qw/aom_dc_left_predictor_32x32 msa neon sse2 avx2/;
110 111 112 113 114 115 116 117
specialize qw/aom_dc_128_predictor_4x4 msa neon sse2/;
specialize qw/aom_dc_128_predictor_4x8 sse2/;
specialize qw/aom_dc_128_predictor_8x4 sse2/;
specialize qw/aom_dc_128_predictor_8x8 neon msa sse2/;
specialize qw/aom_dc_128_predictor_8x16 sse2/;
specialize qw/aom_dc_128_predictor_16x8 sse2/;
specialize qw/aom_dc_128_predictor_16x16 neon msa sse2/;
specialize qw/aom_dc_128_predictor_16x32 sse2/;
118 119
specialize qw/aom_dc_128_predictor_32x16 sse2 avx2/;
specialize qw/aom_dc_128_predictor_32x32 msa neon sse2 avx2/;
120 121 122 123 124 125 126 127
specialize qw/aom_v_predictor_4x4 neon msa sse2/;
specialize qw/aom_v_predictor_4x8 sse2/;
specialize qw/aom_v_predictor_8x4 sse2/;
specialize qw/aom_v_predictor_8x8 neon msa sse2/;
specialize qw/aom_v_predictor_8x16 sse2/;
specialize qw/aom_v_predictor_16x8 sse2/;
specialize qw/aom_v_predictor_16x16 neon msa sse2/;
specialize qw/aom_v_predictor_16x32 sse2/;
128 129
specialize qw/aom_v_predictor_32x16 sse2 avx2/;
specialize qw/aom_v_predictor_32x32 neon msa sse2 avx2/;
130 131 132 133 134 135 136 137 138
specialize qw/aom_h_predictor_4x8 sse2/;
specialize qw/aom_h_predictor_4x4 neon dspr2 msa sse2/;
specialize qw/aom_h_predictor_8x4 sse2/;
specialize qw/aom_h_predictor_8x8 neon dspr2 msa sse2/;
specialize qw/aom_h_predictor_8x16 sse2/;
specialize qw/aom_h_predictor_16x8 sse2/;
specialize qw/aom_h_predictor_16x16 neon dspr2 msa sse2/;
specialize qw/aom_h_predictor_16x32 sse2/;
specialize qw/aom_h_predictor_32x16 sse2/;
139
specialize qw/aom_h_predictor_32x32 neon msa sse2 avx2/;
140 141 142 143 144
specialize qw/aom_paeth_predictor_4x4 ssse3/;
specialize qw/aom_paeth_predictor_4x8 ssse3/;
specialize qw/aom_paeth_predictor_8x4 ssse3/;
specialize qw/aom_paeth_predictor_8x8 ssse3/;
specialize qw/aom_paeth_predictor_8x16 ssse3/;
145 146 147 148 149
specialize qw/aom_paeth_predictor_16x8 ssse3 avx2/;
specialize qw/aom_paeth_predictor_16x16 ssse3 avx2/;
specialize qw/aom_paeth_predictor_16x32 ssse3 avx2/;
specialize qw/aom_paeth_predictor_32x16 ssse3 avx2/;
specialize qw/aom_paeth_predictor_32x32 ssse3 avx2/;
150 151 152 153 154 155 156 157 158 159 160 161 162 163 164
specialize qw/aom_paeth_predictor_16x8 ssse3/;
specialize qw/aom_paeth_predictor_16x16 ssse3/;
specialize qw/aom_paeth_predictor_16x32 ssse3/;
specialize qw/aom_paeth_predictor_32x16 ssse3/;
specialize qw/aom_paeth_predictor_32x32 ssse3/;
specialize qw/aom_smooth_predictor_4x4 ssse3/;
specialize qw/aom_smooth_predictor_4x8 ssse3/;
specialize qw/aom_smooth_predictor_8x4 ssse3/;
specialize qw/aom_smooth_predictor_8x8 ssse3/;
specialize qw/aom_smooth_predictor_8x16 ssse3/;
specialize qw/aom_smooth_predictor_16x8 ssse3/;
specialize qw/aom_smooth_predictor_16x16 ssse3/;
specialize qw/aom_smooth_predictor_16x32 ssse3/;
specialize qw/aom_smooth_predictor_32x16 ssse3/;
specialize qw/aom_smooth_predictor_32x32 ssse3/;
165

166
specialize qw/aom_d63e_predictor_4x4 ssse3/;
Yaowu Xu's avatar
Yaowu Xu committed
167 168 169
specialize qw/aom_d135_predictor_4x4 neon/;
specialize qw/aom_d153_predictor_4x4 ssse3/;
specialize qw/aom_dc_predictor_4x4 dspr2 msa neon sse2/;
170
specialize qw/aom_dc_predictor_4x8 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
171
specialize qw/aom_d153_predictor_8x8 ssse3/;
172
specialize qw/aom_dc_predictor_8x4 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
173
specialize qw/aom_dc_predictor_8x8 dspr2 neon msa sse2/;
174
specialize qw/aom_dc_predictor_8x16 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
175
specialize qw/aom_d153_predictor_16x16 ssse3/;
176
specialize qw/aom_dc_predictor_16x8 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
177
specialize qw/aom_dc_predictor_16x16 dspr2 neon msa sse2/;
178
specialize qw/aom_dc_predictor_16x32 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
179
specialize qw/aom_d153_predictor_32x32 ssse3/;
180

181 182
specialize qw/aom_dc_predictor_32x16 sse2 avx2/;
specialize qw/aom_dc_predictor_32x32 msa neon sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
183

184
if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
185
  specialize qw/aom_highbd_v_predictor_4x4 sse2/;
186 187
  specialize qw/aom_highbd_v_predictor_4x8 sse2/;
  specialize qw/aom_highbd_v_predictor_8x4 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
188
  specialize qw/aom_highbd_v_predictor_8x8 sse2/;
189 190
  specialize qw/aom_highbd_v_predictor_8x16 sse2/;
  specialize qw/aom_highbd_v_predictor_16x8 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
191
  specialize qw/aom_highbd_v_predictor_16x16 sse2/;
192 193
  specialize qw/aom_highbd_v_predictor_16x32 sse2/;
  specialize qw/aom_highbd_v_predictor_32x16 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
194
  specialize qw/aom_highbd_v_predictor_32x32 sse2/;
195 196 197 198 199 200 201 202 203
  specialize qw/aom_highbd_dc_predictor_4x4 sse2/;
  specialize qw/aom_highbd_dc_predictor_4x8 sse2/;
  specialize qw/aom_highbd_dc_predictor_8x4 sse2/;;
  specialize qw/aom_highbd_dc_predictor_8x8 sse2/;;
  specialize qw/aom_highbd_dc_predictor_8x16 sse2/;;
  specialize qw/aom_highbd_dc_predictor_16x8 sse2/;
  specialize qw/aom_highbd_dc_predictor_16x16 sse2/;
  specialize qw/aom_highbd_dc_predictor_16x32 sse2/;
  specialize qw/aom_highbd_dc_predictor_32x16 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
204
  specialize qw/aom_highbd_dc_predictor_32x32 sse2/;
205 206 207 208 209 210 211 212 213 214
  specialize qw/aom_highbd_h_predictor_4x4 sse2/;
  specialize qw/aom_highbd_h_predictor_4x8 sse2/;
  specialize qw/aom_highbd_h_predictor_8x4 sse2/;
  specialize qw/aom_highbd_h_predictor_8x8 sse2/;
  specialize qw/aom_highbd_h_predictor_8x16 sse2/;
  specialize qw/aom_highbd_h_predictor_16x8 sse2/;
  specialize qw/aom_highbd_h_predictor_16x16 sse2/;
  specialize qw/aom_highbd_h_predictor_16x32 sse2/;
  specialize qw/aom_highbd_h_predictor_32x16 sse2/;
  specialize qw/aom_highbd_h_predictor_32x32 sse2/;
215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244
  specialize qw/aom_highbd_dc_left_predictor_4x4 sse2/;
  specialize qw/aom_highbd_dc_top_predictor_4x4 sse2/;
  specialize qw/aom_highbd_dc_128_predictor_4x4 sse2/;
  specialize qw/aom_highbd_dc_left_predictor_4x8 sse2/;
  specialize qw/aom_highbd_dc_top_predictor_4x8 sse2/;
  specialize qw/aom_highbd_dc_128_predictor_4x8 sse2/;
  specialize qw/aom_highbd_dc_left_predictor_8x4 sse2/;
  specialize qw/aom_highbd_dc_top_predictor_8x4 sse2/;
  specialize qw/aom_highbd_dc_128_predictor_8x4 sse2/;
  specialize qw/aom_highbd_dc_left_predictor_8x8 sse2/;
  specialize qw/aom_highbd_dc_top_predictor_8x8 sse2/;
  specialize qw/aom_highbd_dc_128_predictor_8x8 sse2/;
  specialize qw/aom_highbd_dc_left_predictor_8x16 sse2/;
  specialize qw/aom_highbd_dc_top_predictor_8x16 sse2/;
  specialize qw/aom_highbd_dc_128_predictor_8x16 sse2/;
  specialize qw/aom_highbd_dc_left_predictor_16x8 sse2/;
  specialize qw/aom_highbd_dc_top_predictor_16x8 sse2/;
  specialize qw/aom_highbd_dc_128_predictor_16x8 sse2/;
  specialize qw/aom_highbd_dc_left_predictor_16x16 sse2/;
  specialize qw/aom_highbd_dc_top_predictor_16x16 sse2/;
  specialize qw/aom_highbd_dc_128_predictor_16x16 sse2/;
  specialize qw/aom_highbd_dc_left_predictor_16x32 sse2/;
  specialize qw/aom_highbd_dc_top_predictor_16x32 sse2/;
  specialize qw/aom_highbd_dc_128_predictor_16x32 sse2/;
  specialize qw/aom_highbd_dc_left_predictor_32x16 sse2/;
  specialize qw/aom_highbd_dc_top_predictor_32x16 sse2/;
  specialize qw/aom_highbd_dc_128_predictor_32x16 sse2/;
  specialize qw/aom_highbd_dc_left_predictor_32x32 sse2/;
  specialize qw/aom_highbd_dc_top_predictor_32x32 sse2/;
  specialize qw/aom_highbd_dc_128_predictor_32x32 sse2/;
245 246 247 248 249 250 251 252 253 254 255 256 257
  
  specialize qw/aom_highbd_d117_predictor_4x4 sse2/;
  specialize qw/aom_highbd_d117_predictor_8x8 ssse3/;
  specialize qw/aom_highbd_d117_predictor_16x16 ssse3/;
  specialize qw/aom_highbd_d117_predictor_32x32 ssse3/;
  specialize qw/aom_highbd_d135_predictor_4x4 sse2/;
  specialize qw/aom_highbd_d135_predictor_8x8 ssse3/;
  specialize qw/aom_highbd_d135_predictor_16x16 ssse3/;
  specialize qw/aom_highbd_d135_predictor_32x32 ssse3/;
  specialize qw/aom_highbd_d153_predictor_4x4 sse2/;
  specialize qw/aom_highbd_d153_predictor_8x8 ssse3/;
  specialize qw/aom_highbd_d153_predictor_16x16 ssse3/;
  specialize qw/aom_highbd_d153_predictor_32x32 ssse3/;
258 259 260 261 262 263 264 265 266 267 268

  specialize qw/aom_highbd_d45e_predictor_4x4 sse2/;
  specialize qw/aom_highbd_d45e_predictor_4x8 sse2/;
  specialize qw/aom_highbd_d45e_predictor_8x4 sse2/;
  specialize qw/aom_highbd_d45e_predictor_8x8 sse2/;
  specialize qw/aom_highbd_d45e_predictor_8x16 sse2/;
  specialize qw/aom_highbd_d45e_predictor_16x8 avx2/;
  specialize qw/aom_highbd_d45e_predictor_16x16 avx2/;
  specialize qw/aom_highbd_d45e_predictor_16x32 avx2/;
  specialize qw/aom_highbd_d45e_predictor_32x16 avx2/;
  specialize qw/aom_highbd_d45e_predictor_32x32 avx2/;
269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290

  specialize qw/aom_highbd_d207e_predictor_4x4 sse2/;
  specialize qw/aom_highbd_d207e_predictor_4x8 sse2/;
  specialize qw/aom_highbd_d207e_predictor_8x4 sse2/;
  specialize qw/aom_highbd_d207e_predictor_8x8 sse2/;
  specialize qw/aom_highbd_d207e_predictor_8x16 sse2/;
  specialize qw/aom_highbd_d207e_predictor_16x8 sse2/;
  specialize qw/aom_highbd_d207e_predictor_16x16 sse2/;
  specialize qw/aom_highbd_d207e_predictor_16x32 sse2/;
  specialize qw/aom_highbd_d207e_predictor_32x16 avx2/;
  specialize qw/aom_highbd_d207e_predictor_32x32 avx2/;

  specialize qw/aom_highbd_d63e_predictor_4x4 sse2/;
  specialize qw/aom_highbd_d63e_predictor_4x8 sse2/;
  specialize qw/aom_highbd_d63e_predictor_8x4 sse2/;
  specialize qw/aom_highbd_d63e_predictor_8x8 sse2/;
  specialize qw/aom_highbd_d63e_predictor_8x16 sse2/;
  specialize qw/aom_highbd_d63e_predictor_16x8 avx2/;
  specialize qw/aom_highbd_d63e_predictor_16x16 avx2/;
  specialize qw/aom_highbd_d63e_predictor_16x32 avx2/;
  specialize qw/aom_highbd_d63e_predictor_32x16 avx2/;
  specialize qw/aom_highbd_d63e_predictor_32x32 avx2/;
291
}  # CONFIG_HIGHBITDEPTH
Yaowu Xu's avatar
Yaowu Xu committed
292 293 294 295

#
# Sub Pixel Filters
#
Fergus Simpson's avatar
Fergus Simpson committed
296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316
add_proto qw/void aom_convolve_copy/,             "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve_avg/,              "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8/,                 "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_horiz/,           "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_vert/,            "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_avg/,             "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_avg_horiz/,       "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_avg_vert/,        "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_scaled_2d/,                 "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_scaled_horiz/,              "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_scaled_vert/,               "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_scaled_avg_2d/,             "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_scaled_avg_horiz/,          "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_scaled_avg_vert/,           "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";

add_proto qw/void aom_convolve8_horiz_scale/,     "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int subpel_x, int x_step_q4, const int16_t *filter_y, int subpel_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_vert_scale/,      "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int subpel_x, int x_step_q4, const int16_t *filter_y, int subpel_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_avg_horiz_scale/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int subpel_x, int x_step_q4, const int16_t *filter_y, int subpel_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_avg_vert_scale/,  "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int subpel_x, int x_step_q4, const int16_t *filter_y, int subpel_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_scale/,           "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int subpel_x, int x_step_q4, const int16_t *filter_y, int subpel_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_avg_scale/,       "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int subpel_x, int x_step_q4, const int16_t *filter_y, int subpel_y, int y_step_q4, int w, int h";
Yaowu Xu's avatar
Yaowu Xu committed
317 318 319 320 321 322 323 324 325 326 327

specialize qw/aom_convolve_copy       sse2      /;
specialize qw/aom_convolve_avg        sse2      /;
specialize qw/aom_convolve8           sse2 ssse3/, "$avx2_ssse3";
specialize qw/aom_convolve8_horiz     sse2 ssse3/, "$avx2_ssse3";
specialize qw/aom_convolve8_vert      sse2 ssse3/, "$avx2_ssse3";
specialize qw/aom_convolve8_avg       sse2 ssse3/;
specialize qw/aom_convolve8_avg_horiz sse2 ssse3/;
specialize qw/aom_convolve8_avg_vert  sse2 ssse3/;
specialize qw/aom_scaled_2d                ssse3/;

328 329 330 331
if (aom_config("CONFIG_LOOP_RESTORATION") eq "yes") {
  add_proto qw/void aom_convolve8_add_src/,       "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
  add_proto qw/void aom_convolve8_add_src_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
  add_proto qw/void aom_convolve8_add_src_vert/,  "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
332 333 334
  add_proto qw/void aom_convolve8_add_src_hip/,       "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
  add_proto qw/void aom_convolve8_add_src_horiz_hip/, "const uint8_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
  add_proto qw/void aom_convolve8_add_src_vert_hip/,  "const uint16_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
335

336 337 338
  specialize qw/aom_convolve8_add_src ssse3/;
  specialize qw/aom_convolve8_add_src_horiz ssse3/;
  specialize qw/aom_convolve8_add_src_vert ssse3/;
339
  specialize qw/aom_convolve8_add_src_hip sse2/;
340 341
}  # CONFIG_LOOP_RESTORATION

Yaowu Xu's avatar
Yaowu Xu committed
342 343 344 345 346 347 348 349 350 351 352 353
# TODO(any): These need to be extended to up to 128x128 block sizes
if (!(aom_config("CONFIG_AV1") eq "yes" && aom_config("CONFIG_EXT_PARTITION") eq "yes")) {
  specialize qw/aom_convolve_copy       neon dspr2 msa/;
  specialize qw/aom_convolve_avg        neon dspr2 msa/;
  specialize qw/aom_convolve8           neon dspr2 msa/;
  specialize qw/aom_convolve8_horiz     neon dspr2 msa/;
  specialize qw/aom_convolve8_vert      neon dspr2 msa/;
  specialize qw/aom_convolve8_avg       neon dspr2 msa/;
  specialize qw/aom_convolve8_avg_horiz neon dspr2 msa/;
  specialize qw/aom_convolve8_avg_vert  neon dspr2 msa/;
}

354
if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
355
  add_proto qw/void aom_highbd_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
356
  specialize qw/aom_highbd_convolve_copy sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
357 358

  add_proto qw/void aom_highbd_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
359
  specialize qw/aom_highbd_convolve_avg sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
360 361

  add_proto qw/void aom_highbd_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
362
  specialize qw/aom_highbd_convolve8 avx2/, "$sse2_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
363 364

  add_proto qw/void aom_highbd_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
365
  specialize qw/aom_highbd_convolve8_horiz avx2/, "$sse2_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
366 367

  add_proto qw/void aom_highbd_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
368
  specialize qw/aom_highbd_convolve8_vert avx2/, "$sse2_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
369 370

  add_proto qw/void aom_highbd_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
371
  specialize qw/aom_highbd_convolve8_avg avx2/, "$sse2_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
372 373

  add_proto qw/void aom_highbd_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
374
  specialize qw/aom_highbd_convolve8_avg_horiz avx2/, "$sse2_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
375 376

  add_proto qw/void aom_highbd_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
377
  specialize qw/aom_highbd_convolve8_avg_vert avx2/, "$sse2_x86_64";
378 379 380 381 382

  if (aom_config("CONFIG_LOOP_RESTORATION") eq "yes") {
    add_proto qw/void aom_highbd_convolve8_add_src/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
    add_proto qw/void aom_highbd_convolve8_add_src_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
    add_proto qw/void aom_highbd_convolve8_add_src_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
383 384 385
    add_proto qw/void aom_highbd_convolve8_add_src_hip/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
    add_proto qw/void aom_highbd_convolve8_add_src_horiz_hip/, "const uint8_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
    add_proto qw/void aom_highbd_convolve8_add_src_vert_hip/, "const uint16_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
386

387
    specialize qw/aom_highbd_convolve8_add_src/, "$sse2_x86_64";
388
    specialize qw/aom_highbd_convolve8_add_src_hip ssse3/;
389 390
    # The _horiz/_vert functions are currently unused, so we don't bother
    # specialising them.
391
  }  # CONFIG_LOOP_RESTORATION
392
}  # CONFIG_HIGHBITDEPTH
Yaowu Xu's avatar
Yaowu Xu committed
393 394 395 396 397

#
# Loopfilter
#
add_proto qw/void aom_lpf_vertical_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
398 399 400 401 402 403
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") {
  specialize qw/aom_lpf_vertical_16 sse2/;
} else {
  specialize qw/aom_lpf_vertical_16 sse2 neon_asm dspr2 msa/;
  $aom_lpf_vertical_16_neon_asm=aom_lpf_vertical_16_neon;
}
Yaowu Xu's avatar
Yaowu Xu committed
404 405

add_proto qw/void aom_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
406 407 408 409
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") ne "yes") {
  specialize qw/aom_lpf_vertical_16_dual sse2 neon_asm dspr2 msa/;
  $aom_lpf_vertical_16_dual_neon_asm=aom_lpf_vertical_16_dual_neon;
}
Yaowu Xu's avatar
Yaowu Xu committed
410 411

add_proto qw/void aom_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
412 413 414 415 416
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") {
  specialize qw/aom_lpf_vertical_8 sse2/;
} else {
  specialize qw/aom_lpf_vertical_8 sse2 neon dspr2 msa/;
}
Yaowu Xu's avatar
Yaowu Xu committed
417 418

add_proto qw/void aom_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
419 420 421 422
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") ne "yes") {
  specialize qw/aom_lpf_vertical_8_dual sse2 neon_asm dspr2 msa/;
  $aom_lpf_vertical_8_dual_neon_asm=aom_lpf_vertical_8_dual_neon;
}
Yaowu Xu's avatar
Yaowu Xu committed
423 424

add_proto qw/void aom_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
425 426 427 428 429
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") {
  specialize qw/aom_lpf_vertical_4 sse2/;
} else {
  specialize qw/aom_lpf_vertical_4 sse2 neon dspr2 msa/;
}
Yaowu Xu's avatar
Yaowu Xu committed
430 431

add_proto qw/void aom_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
432 433 434
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") ne "yes") {
  specialize qw/aom_lpf_vertical_4_dual sse2 neon dspr2 msa/;
}
Yaowu Xu's avatar
Yaowu Xu committed
435 436

add_proto qw/void aom_lpf_horizontal_edge_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
437 438 439 440 441 442
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") {
  specialize qw/aom_lpf_horizontal_edge_8 sse2/;
} else {
  specialize qw/aom_lpf_horizontal_edge_8 sse2 avx2 neon_asm dspr2 msa/;
  $aom_lpf_horizontal_edge_8_neon_asm=aom_lpf_horizontal_edge_8_neon;
}
Yaowu Xu's avatar
Yaowu Xu committed
443 444

add_proto qw/void aom_lpf_horizontal_edge_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
445 446 447 448 449 450
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") {
  specialize qw/aom_lpf_horizontal_edge_16 sse2/;
} else {
  specialize qw/aom_lpf_horizontal_edge_16 sse2 avx2 neon_asm dspr2 msa/;
  $aom_lpf_horizontal_edge_16_neon_asm=aom_lpf_horizontal_edge_16_neon;
}
Yaowu Xu's avatar
Yaowu Xu committed
451 452

add_proto qw/void aom_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
453 454 455 456 457
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") {
  specialize qw/aom_lpf_horizontal_8 sse2/;
} else {
  specialize qw/aom_lpf_horizontal_8 sse2 neon dspr2 msa/;
}
Yaowu Xu's avatar
Yaowu Xu committed
458 459

add_proto qw/void aom_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
460 461 462 463
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") ne "yes") {
  specialize qw/aom_lpf_horizontal_8_dual sse2 neon_asm dspr2 msa/;
  $aom_lpf_horizontal_8_dual_neon_asm=aom_lpf_horizontal_8_dual_neon;
}
Yaowu Xu's avatar
Yaowu Xu committed
464 465

add_proto qw/void aom_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
466 467 468 469 470
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") {
  specialize qw/aom_lpf_horizontal_4 sse2/;
} else {
  specialize qw/aom_lpf_horizontal_4 sse2 neon dspr2 msa/;
}
Yaowu Xu's avatar
Yaowu Xu committed
471 472

add_proto qw/void aom_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
473 474 475
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") ne "yes") {
  specialize qw/aom_lpf_horizontal_4_dual sse2 neon dspr2 msa/;
}
Yaowu Xu's avatar
Yaowu Xu committed
476

477
if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
478 479 480 481
  add_proto qw/void aom_highbd_lpf_vertical_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
  specialize qw/aom_highbd_lpf_vertical_16 sse2/;

  add_proto qw/void aom_highbd_lpf_vertical_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
Yi Luo's avatar
Yi Luo committed
482
  specialize qw/aom_highbd_lpf_vertical_16_dual sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
483 484 485 486 487

  add_proto qw/void aom_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
  specialize qw/aom_highbd_lpf_vertical_8 sse2/;

  add_proto qw/void aom_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
Yi Luo's avatar
Yi Luo committed
488
  specialize qw/aom_highbd_lpf_vertical_8_dual sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
489 490 491 492 493

  add_proto qw/void aom_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
  specialize qw/aom_highbd_lpf_vertical_4 sse2/;

  add_proto qw/void aom_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
Yi Luo's avatar
Yi Luo committed
494
  specialize qw/aom_highbd_lpf_vertical_4_dual sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
495 496 497 498 499

  add_proto qw/void aom_highbd_lpf_horizontal_edge_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
  specialize qw/aom_highbd_lpf_horizontal_edge_8 sse2/;

  add_proto qw/void aom_highbd_lpf_horizontal_edge_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
Yi Luo's avatar
Yi Luo committed
500
  specialize qw/aom_highbd_lpf_horizontal_edge_16 sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
501 502 503 504 505

  add_proto qw/void aom_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
  specialize qw/aom_highbd_lpf_horizontal_8 sse2/;

  add_proto qw/void aom_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
Yi Luo's avatar
Yi Luo committed
506
  specialize qw/aom_highbd_lpf_horizontal_8_dual sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
507 508 509 510 511

  add_proto qw/void aom_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
  specialize qw/aom_highbd_lpf_horizontal_4 sse2/;

  add_proto qw/void aom_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
Yi Luo's avatar
Yi Luo committed
512
  specialize qw/aom_highbd_lpf_horizontal_4_dual sse2 avx2/;
513
}  # CONFIG_HIGHBITDEPTH
Yaowu Xu's avatar
Yaowu Xu committed
514 515 516 517 518 519 520 521

#
# Encoder functions.
#

#
# Forward transform
#
522
if ((aom_config("CONFIG_AV1_ENCODER") eq "yes") || (aom_config("CONFIG_PVQ") eq "yes")){
523
  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
524 525 526 527 528 529 530
    add_proto qw/void aom_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct4x4 sse2/;

    add_proto qw/void aom_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct4x4_1 sse2/;

    add_proto qw/void aom_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
531
    specialize qw/aom_fdct8x8 sse2/, "$ssse3_x86_64";
532 533 534 535 536 537 538 539 540 541 542

    add_proto qw/void aom_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct16x16 sse2/;

    add_proto qw/void aom_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct32x32 sse2 avx2/;

    add_proto qw/void aom_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct32x32_rd sse2 avx2/;

    # High bit depth
543 544
    add_proto qw/void aom_highbd_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_highbd_fdct4x4 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
545

546 547
    add_proto qw/void aom_highbd_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_highbd_fdct8x8 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
548

549 550
    add_proto qw/void aom_highbd_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_highbd_fdct16x16 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
551

552 553
    add_proto qw/void aom_highbd_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_highbd_fdct32x32 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
554

555 556
    add_proto qw/void aom_highbd_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_highbd_fdct32x32_rd sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
557

558 559 560
  } else {
    add_proto qw/void aom_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct4x4 sse2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
561

562 563
    add_proto qw/void aom_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct4x4_1 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
564

565 566
    add_proto qw/void aom_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct8x8 sse2 neon msa/, "$ssse3_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
567

568 569
    add_proto qw/void aom_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct16x16 sse2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
570

571 572
    add_proto qw/void aom_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct32x32 sse2 avx2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
573

574 575
    add_proto qw/void aom_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct32x32_rd sse2 avx2 msa/;
576
  }  # CONFIG_HIGHBITDEPTH
Yaowu Xu's avatar
Yaowu Xu committed
577 578 579 580 581 582 583 584 585 586 587 588 589 590
}  # CONFIG_AV1_ENCODER

#
# Inverse transform
if (aom_config("CONFIG_AV1") eq "yes") {
  add_proto qw/void aom_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

  add_proto qw/void aom_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_iwht4x4_16_add sse2/;

  add_proto qw/void aom_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

  add_proto qw/void aom_highbd_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

James Zern's avatar
James Zern committed
591 592
  add_proto qw/void aom_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct4x4_16_add sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
593

James Zern's avatar
James Zern committed
594 595
  add_proto qw/void aom_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct4x4_1_add sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
596

James Zern's avatar
James Zern committed
597 598
  add_proto qw/void aom_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct8x8_64_add sse2 ssse3/;
Yaowu Xu's avatar
Yaowu Xu committed
599

James Zern's avatar
James Zern committed
600 601
  add_proto qw/void aom_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct8x8_12_add sse2 ssse3/;
Yaowu Xu's avatar
Yaowu Xu committed
602

James Zern's avatar
James Zern committed
603 604
  add_proto qw/void aom_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct8x8_1_add sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
605

James Zern's avatar
James Zern committed
606 607
  add_proto qw/void aom_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct16x16_256_add sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
608

James Zern's avatar
James Zern committed
609 610
  add_proto qw/void aom_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct16x16_38_add avx2/;
611

James Zern's avatar
James Zern committed
612 613
  add_proto qw/void aom_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct16x16_10_add sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
614

James Zern's avatar
James Zern committed
615 616
  add_proto qw/void aom_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct16x16_1_add sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
617

James Zern's avatar
James Zern committed
618 619
  add_proto qw/void aom_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct32x32_1024_add sse2 ssse3 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
620

James Zern's avatar
James Zern committed
621 622 623 624
  add_proto qw/void aom_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct32x32_135_add sse2 ssse3 avx2/;
  # Need to add 135 eob idct32x32 implementations.
  $aom_idct32x32_135_add_sse2=aom_idct32x32_1024_add_sse2;
Yaowu Xu's avatar
Yaowu Xu committed
625

James Zern's avatar
James Zern committed
626 627
  add_proto qw/void aom_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct32x32_34_add sse2 ssse3 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
628

James Zern's avatar
James Zern committed
629 630 631 632
  add_proto qw/void aom_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct32x32_1_add sse2 avx2/;
  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
  } else {
Yaowu Xu's avatar
Yaowu Xu committed
633 634 635 636 637 638 639 640 641 642
    add_proto qw/void aom_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/aom_idct4x4_1_add sse2 neon dspr2 msa/;

    add_proto qw/void aom_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/aom_idct4x4_16_add sse2 neon dspr2 msa/;

    add_proto qw/void aom_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/aom_idct8x8_1_add sse2 neon dspr2 msa/;

    add_proto qw/void aom_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
643
    specialize qw/aom_idct8x8_64_add sse2 ssse3 neon dspr2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
644 645

    add_proto qw/void aom_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
646
    specialize qw/aom_idct8x8_12_add sse2 ssse3 neon dspr2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
647 648

    add_proto qw/void aom_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
Yi Luo's avatar
Yi Luo committed
649
    specialize qw/aom_idct16x16_1_add sse2 avx2 neon dspr2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
650 651

    add_proto qw/void aom_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
Yi Luo's avatar
Yi Luo committed
652
    specialize qw/aom_idct16x16_256_add sse2 avx2 neon dspr2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
653

654
    add_proto qw/void aom_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
Yi Luo's avatar
Yi Luo committed
655
    specialize qw/aom_idct16x16_38_add avx2/;
656

Yaowu Xu's avatar
Yaowu Xu committed
657
    add_proto qw/void aom_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
Yi Luo's avatar
Yi Luo committed
658
    specialize qw/aom_idct16x16_10_add sse2 avx2 neon dspr2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
659 660

    add_proto qw/void aom_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
Yi Luo's avatar
Yi Luo committed
661
    specialize qw/aom_idct32x32_1024_add sse2 ssse3 avx2 neon dspr2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
662 663

    add_proto qw/void aom_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
Yi Luo's avatar
Yi Luo committed
664
    specialize qw/aom_idct32x32_135_add sse2 ssse3 avx2 neon dspr2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
665 666 667 668 669 670 671
    # Need to add 135 eob idct32x32 implementations.
    $aom_idct32x32_135_add_sse2=aom_idct32x32_1024_add_sse2;
    $aom_idct32x32_135_add_neon=aom_idct32x32_1024_add_neon;
    $aom_idct32x32_135_add_dspr2=aom_idct32x32_1024_add_dspr2;
    $aom_idct32x32_135_add_msa=aom_idct32x32_1024_add_msa;

    add_proto qw/void aom_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
Yi Luo's avatar
Yi Luo committed
672
    specialize qw/aom_idct32x32_34_add sse2 ssse3 avx2 neon dspr2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
673 674 675 676
    # Need to add 34 eob idct32x32 neon implementation.
    $aom_idct32x32_34_add_neon=aom_idct32x32_1024_add_neon;

    add_proto qw/void aom_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
Yi Luo's avatar
Yi Luo committed
677
    specialize qw/aom_idct32x32_1_add sse2 avx2 neon dspr2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
678 679 680 681 682 683

    add_proto qw/void aom_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/aom_iwht4x4_1_add msa/;

    add_proto qw/void aom_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/aom_iwht4x4_16_add msa sse2/;
James Zern's avatar
James Zern committed
684
  }  # CONFIG_HIGHBITDEPTH
Yaowu Xu's avatar
Yaowu Xu committed
685 686 687 688 689
}  # CONFIG_AV1

#
# Quantization
#
Thomas Davies's avatar
Thomas Davies committed
690
if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
James Zern's avatar
James Zern committed
691 692
  add_proto qw/void aom_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
  specialize qw/aom_quantize_b sse2/, "$ssse3_x86_64", "$avx_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
693

James Zern's avatar
James Zern committed
694 695
  add_proto qw/void aom_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
  specialize qw/aom_quantize_b_32x32/, "$ssse3_x86_64", "$avx_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
696

James Zern's avatar
James Zern committed
697
  add_proto qw/void aom_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
Thomas Davies's avatar
Thomas Davies committed
698
}  # CONFIG_AV1_ENCODER
699

Thomas Davies's avatar
Thomas Davies committed
700 701 702
if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
  add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
  specialize qw/aom_highbd_quantize_b sse2 avx2/;
703

Thomas Davies's avatar
Thomas Davies committed
704 705
  add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
  specialize qw/aom_highbd_quantize_b_32x32 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
706

Thomas Davies's avatar
Thomas Davies committed
707
  add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
708

Thomas Davies's avatar
Thomas Davies committed
709
}  # CONFIG_AV1_ENCODER
Yaowu Xu's avatar
Yaowu Xu committed
710 711 712 713
if (aom_config("CONFIG_AV1") eq "yes") {
  #
  # Alpha blending with mask
  #
714 715 716
  if (aom_config("CONFIG_CONVOLVE_ROUND") eq "yes") {
    add_proto qw/void aom_blend_a64_d32_mask/, "int32_t *dst, uint32_t dst_stride, const int32_t *src0, uint32_t src0_stride, const int32_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx";
  }
Yaowu Xu's avatar
Yaowu Xu committed
717 718 719 720 721 722 723
  add_proto qw/void aom_blend_a64_mask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx";
  add_proto qw/void aom_blend_a64_hmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w";
  add_proto qw/void aom_blend_a64_vmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w";
  specialize "aom_blend_a64_mask", qw/sse4_1/;
  specialize "aom_blend_a64_hmask", qw/sse4_1/;
  specialize "aom_blend_a64_vmask", qw/sse4_1/;

724
  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
725 726 727 728 729 730 731 732 733
    add_proto qw/void aom_highbd_blend_a64_mask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx, int bd";
    add_proto qw/void aom_highbd_blend_a64_hmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w, int bd";
    add_proto qw/void aom_highbd_blend_a64_vmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w, int bd";
    specialize "aom_highbd_blend_a64_mask", qw/sse4_1/;
    specialize "aom_highbd_blend_a64_hmask", qw/sse4_1/;
    specialize "aom_highbd_blend_a64_vmask", qw/sse4_1/;
  }
}  # CONFIG_AV1

734
if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
735
  #
James Zern's avatar
James Zern committed
736
  # Block subtraction
Yaowu Xu's avatar
Yaowu Xu committed
737
  #
James Zern's avatar
James Zern committed
738 739 740 741 742 743 744 745 746 747 748 749
  add_proto qw/void aom_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
  specialize qw/aom_subtract_block neon msa sse2/;

  if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
    #
    # Sum of Squares
    #
    add_proto qw/uint64_t aom_sum_squares_2d_i16/, "const int16_t *src, int stride, int width, int height";
    specialize qw/aom_sum_squares_2d_i16 sse2/;

    add_proto qw/uint64_t aom_sum_squares_i16/, "const int16_t *src, uint32_t N";
    specialize qw/aom_sum_squares_i16 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
750 751
  }

James Zern's avatar
James Zern committed
752

Yaowu Xu's avatar
Yaowu Xu committed
753
  #
James Zern's avatar
James Zern committed
754
  # Avg
Yaowu Xu's avatar
Yaowu Xu committed
755
  #
James Zern's avatar
James Zern committed
756 757 758 759 760 761 762 763 764
  if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
    #
    # Avg
    #
    specialize qw/aom_avg_8x8 sse2 neon msa/;
    if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
      add_proto qw/void aom_highbd_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride, int bd";
      specialize qw/aom_highbd_subtract_block sse2/;
    }
Yaowu Xu's avatar
Yaowu Xu committed
765

James Zern's avatar
James Zern committed
766 767 768 769 770 771 772 773
    #
    # Minmax
    #
    add_proto qw/void aom_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
    specialize qw/aom_minmax_8x8 sse2 neon/;
    if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
      add_proto qw/void aom_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
    }
Yaowu Xu's avatar
Yaowu Xu committed
774

James Zern's avatar
James Zern committed
775 776
    add_proto qw/void aom_hadamard_8x8/, "const int16_t *src_diff, int src_stride, int16_t *coeff";
    specialize qw/aom_hadamard_8x8 sse2 neon/, "$ssse3_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
777

James Zern's avatar
James Zern committed
778 779
    add_proto qw/void aom_hadamard_16x16/, "const int16_t *src_diff, int src_stride, int16_t *coeff";
    specialize qw/aom_hadamard_16x16 sse2 neon/;
Yaowu Xu's avatar
Yaowu Xu committed
780

James Zern's avatar
James Zern committed
781 782
    add_proto qw/int aom_satd/, "const int16_t *coeff, int length";
    specialize qw/aom_satd sse2 neon/;
Yaowu Xu's avatar
Yaowu Xu committed
783

James Zern's avatar
James Zern committed
784 785
    add_proto qw/void aom_int_pro_row/, "int16_t *hbuf, const uint8_t *ref, int ref_stride, int height";
    specialize qw/aom_int_pro_row sse2 neon/;
Yaowu Xu's avatar
Yaowu Xu committed
786

James Zern's avatar
James Zern committed
787 788
    add_proto qw/int16_t aom_int_pro_col/, "const uint8_t *ref, int width";
    specialize qw/aom_int_pro_col sse2 neon/;
Yaowu Xu's avatar
Yaowu Xu committed
789

James Zern's avatar
James Zern committed
790 791 792
    add_proto qw/int aom_vector_var/, "const int16_t *ref, const int16_t *src, int bwl";
    specialize qw/aom_vector_var neon sse2/;
  }  # CONFIG_AV1_ENCODER
Yaowu Xu's avatar
Yaowu Xu committed
793

James Zern's avatar
James Zern committed
794 795 796
  #
  # Single block SAD / Single block Avg SAD
  #
Yaowu Xu's avatar
Yaowu Xu committed
797 798
  foreach (@block_sizes) {
    ($w, $h) = @$_;
James Zern's avatar
James Zern committed
799 800
    add_proto qw/unsigned int/, "aom_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
    add_proto qw/unsigned int/, "aom_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
Yaowu Xu's avatar
Yaowu Xu committed
801 802
  }

James Zern's avatar
James Zern committed
803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835
  specialize qw/aom_sad128x128    avx2          sse2/;
  specialize qw/aom_sad128x64     avx2          sse2/;
  specialize qw/aom_sad64x128     avx2          sse2/;
  specialize qw/aom_sad64x64      avx2 neon msa sse2/;
  specialize qw/aom_sad64x32      avx2      msa sse2/;
  specialize qw/aom_sad32x64      avx2      msa sse2/;
  specialize qw/aom_sad32x32      avx2 neon msa sse2/;
  specialize qw/aom_sad32x16      avx2      msa sse2/;
  specialize qw/aom_sad16x32                msa sse2/;
  specialize qw/aom_sad16x16           neon msa sse2/;
  specialize qw/aom_sad16x8            neon msa sse2/;
  specialize qw/aom_sad8x16            neon msa sse2/;
  specialize qw/aom_sad8x8             neon msa sse2/;
  specialize qw/aom_sad8x4                  msa sse2/;
  specialize qw/aom_sad4x8                  msa sse2/;
  specialize qw/aom_sad4x4             neon msa sse2/;

  specialize qw/aom_sad128x128_avg avx2     sse2/;
  specialize qw/aom_sad128x64_avg  avx2     sse2/;
  specialize qw/aom_sad64x128_avg  avx2     sse2/;
  specialize qw/aom_sad64x64_avg   avx2 msa sse2/;
  specialize qw/aom_sad64x32_avg   avx2 msa sse2/;
  specialize qw/aom_sad32x64_avg   avx2 msa sse2/;
  specialize qw/aom_sad32x32_avg   avx2 msa sse2/;
  specialize qw/aom_sad32x16_avg   avx2 msa sse2/;
  specialize qw/aom_sad16x32_avg        msa sse2/;
  specialize qw/aom_sad16x16_avg        msa sse2/;
  specialize qw/aom_sad16x8_avg         msa sse2/;
  specialize qw/aom_sad8x16_avg         msa sse2/;
  specialize qw/aom_sad8x8_avg          msa sse2/;
  specialize qw/aom_sad8x4_avg          msa sse2/;
  specialize qw/aom_sad4x8_avg          msa sse2/;
  specialize qw/aom_sad4x4_avg          msa sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
836

837
  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
838 839
    foreach (@block_sizes) {
      ($w, $h) = @$_;
James Zern's avatar
James Zern committed
840 841 842 843 844 845
      add_proto qw/unsigned int/, "aom_highbd_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
      add_proto qw/unsigned int/, "aom_highbd_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
      if ($w != 128 && $h != 128 && $w != 4) {
        specialize "aom_highbd_sad${w}x${h}", qw/sse2/;
        specialize "aom_highbd_sad${w}x${h}_avg", qw/sse2/;
      }
Yaowu Xu's avatar
Yaowu Xu committed
846
    }
James Zern's avatar
James Zern committed
847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869
    specialize qw/aom_highbd_sad128x128 avx2/;
    specialize qw/aom_highbd_sad128x64  avx2/;
    specialize qw/aom_highbd_sad64x128  avx2/;
    specialize qw/aom_highbd_sad64x64   avx2/;
    specialize qw/aom_highbd_sad64x32   avx2/;
    specialize qw/aom_highbd_sad32x64   avx2/;
    specialize qw/aom_highbd_sad32x32   avx2/;
    specialize qw/aom_highbd_sad32x16   avx2/;
    specialize qw/aom_highbd_sad16x32   avx2/;
    specialize qw/aom_highbd_sad16x16   avx2/;
    specialize qw/aom_highbd_sad16x8    avx2/;

    specialize qw/aom_highbd_sad128x128_avg avx2/;
    specialize qw/aom_highbd_sad128x64_avg  avx2/;
    specialize qw/aom_highbd_sad64x128_avg  avx2/;
    specialize qw/aom_highbd_sad64x64_avg   avx2/;
    specialize qw/aom_highbd_sad64x32_avg   avx2/;
    specialize qw/aom_highbd_sad32x64_avg   avx2/;
    specialize qw/aom_highbd_sad32x32_avg   avx2/;
    specialize qw/aom_highbd_sad32x16_avg   avx2/;
    specialize qw/aom_highbd_sad16x32_avg   avx2/;
    specialize qw/aom_highbd_sad16x16_avg   avx2/;
    specialize qw/aom_highbd_sad16x8_avg    avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
870 871
  }

James Zern's avatar
James Zern committed
872 873 874
  #
  # Masked SAD
  #
875 876 877 878 879 880 881
  foreach (@block_sizes) {
    ($w, $h) = @$_;
    add_proto qw/unsigned int/, "aom_masked_sad${w}x${h}", "const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask";
    specialize "aom_masked_sad${w}x${h}", qw/ssse3/;
  }

  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
James Zern's avatar
James Zern committed
882 883
    foreach (@block_sizes) {
      ($w, $h) = @$_;
884 885
      add_proto qw/unsigned int/, "aom_highbd_masked_sad${w}x${h}", "const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask";
      specialize "aom_highbd_masked_sad${w}x${h}", qw/ssse3/;
James Zern's avatar
James Zern committed
886
    }
887
  }
Yaowu Xu's avatar
Yaowu Xu committed
888

James Zern's avatar
James Zern committed
889 890 891 892
  #
  # OBMC SAD
  #
  if (aom_config("CONFIG_MOTION_VAR") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
893 894
    foreach (@block_sizes) {
      ($w, $h) = @$_;
James Zern's avatar
James Zern committed
895
      add_proto qw/unsigned int/, "aom_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask";
896 897 898
      if (! (($w == 128 && $h == 32) || ($w == 32 && $h == 128))) {
         specialize "aom_obmc_sad${w}x${h}", qw/sse4_1/;
      }
James Zern's avatar
James Zern committed
899 900 901 902 903 904
    }

    if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
      foreach (@block_sizes) {
        ($w, $h) = @$_;
        add_proto qw/unsigned int/, "aom_highbd_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask";
905 906 907
        if (! (($w == 128 && $h == 32) || ($w == 32 && $h == 128))) {
          specialize "aom_highbd_obmc_sad${w}x${h}", qw/sse4_1/;
        }
James Zern's avatar
James Zern committed
908
      }
Yaowu Xu's avatar
Yaowu Xu committed
909 910 911
    }
  }

James Zern's avatar
James Zern committed
912 913 914 915 916 917 918 919 920 921 922 923
  #
  # Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally
  #
  # Blocks of 3
  foreach $s (@block_widths) {
    add_proto qw/void/, "aom_sad${s}x${s}x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
  }
  specialize qw/aom_sad64x64x3            msa/;
  specialize qw/aom_sad32x32x3            msa/;
  specialize qw/aom_sad16x16x3 sse3 ssse3 msa/;
  specialize qw/aom_sad8x8x3   sse3       msa/;
  specialize qw/aom_sad4x4x3   sse3       msa/;
Yaowu Xu's avatar
Yaowu Xu committed
924

James Zern's avatar
James Zern committed
925 926 927 928 929 930
  add_proto qw/void/, "aom_sad16x8x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
  specialize qw/aom_sad16x8x3 sse3 ssse3 msa/;
  add_proto qw/void/, "aom_sad8x16x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
  specialize qw/aom_sad8x16x3 sse3 msa/;

  # Blocks of 8
Yaowu Xu's avatar
Yaowu Xu committed
931
  foreach $s (@block_widths) {
James Zern's avatar
James Zern committed
932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955
    add_proto qw/void/, "aom_sad${s}x${s}x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
  }
  specialize qw/aom_sad64x64x8        msa/;
  specialize qw/aom_sad32x32x8        msa/;
  specialize qw/aom_sad16x16x8 sse4_1 msa/;
  specialize qw/aom_sad8x8x8   sse4_1 msa/;
  specialize qw/aom_sad4x4x8   sse4_1 msa/;

  add_proto qw/void/, "aom_sad16x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
  specialize qw/aom_sad16x8x8 sse4_1 msa/;
  add_proto qw/void/, "aom_sad8x16x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
  specialize qw/aom_sad8x16x8 sse4_1 msa/;
  add_proto qw/void/, "aom_sad8x4x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
  specialize qw/aom_sad8x4x8 msa/;
  add_proto qw/void/, "aom_sad4x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
  specialize qw/aom_sad4x8x8 msa/;

  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
    foreach $s (@block_widths) {
      # Blocks of 3
      add_proto qw/void/, "aom_highbd_sad${s}x${s}x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
      # Blocks of 8
      add_proto qw/void/, "aom_highbd_sad${s}x${s}x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
    }
Yaowu Xu's avatar
Yaowu Xu committed
956
    # Blocks of 3
James Zern's avatar
James Zern committed
957 958
    add_proto qw/void/, "aom_highbd_sad16x8x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
    add_proto qw/void/, "aom_highbd_sad8x16x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
Yaowu Xu's avatar
Yaowu Xu committed
959
    # Blocks of 8
James Zern's avatar
James Zern committed
960 961 962 963
    add_proto qw/void/, "aom_highbd_sad16x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
    add_proto qw/void/, "aom_highbd_sad8x16x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
    add_proto qw/void/, "aom_highbd_sad8x4x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
    add_proto qw/void/, "aom_highbd_sad4x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
Yaowu Xu's avatar
Yaowu Xu committed
964 965 966 967 968 969 970
  }

  #
  # Multi-block SAD, comparing a reference to N independent blocks
  #
  foreach (@block_sizes) {
    ($w, $h) = @$_;
James Zern's avatar
James Zern committed
971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000
    add_proto qw/void/, "aom_sad${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
  }

  specialize qw/aom_sad128x128x4d avx2          sse2/;
  specialize qw/aom_sad128x64x4d  avx2          sse2/;
  specialize qw/aom_sad64x128x4d  avx2          sse2/;
  specialize qw/aom_sad64x64x4d   avx2 neon msa sse2/;
  specialize qw/aom_sad64x32x4d   avx2      msa sse2/;
  specialize qw/aom_sad32x64x4d   avx2      msa sse2/;
  specialize qw/aom_sad32x32x4d   avx2 neon msa sse2/;
  specialize qw/aom_sad32x16x4d             msa sse2/;
  specialize qw/aom_sad16x32x4d             msa sse2/;
  specialize qw/aom_sad16x16x4d        neon msa sse2/;
  specialize qw/aom_sad16x8x4d              msa sse2/;
  specialize qw/aom_sad8x16x4d              msa sse2/;
  specialize qw/aom_sad8x8x4d               msa sse2/;
  specialize qw/aom_sad8x4x4d               msa sse2/;
  specialize qw/aom_sad4x8x4d               msa sse2/;
  specialize qw/aom_sad4x4x4d               msa sse2/;

  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
    #
    # Multi-block SAD, comparing a reference to N independent blocks
    #
    foreach (@block_sizes) {
      ($w, $h) = @$_;
      add_proto qw/void/, "aom_highbd_sad${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
      if ($w != 128 && $h != 128) {
        specialize "aom_highbd_sad${w}x${h}x4d", qw/sse2/;
      }
Yaowu Xu's avatar
Yaowu Xu committed
1001
    }
James Zern's avatar
James Zern committed
1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012
    specialize qw/aom_highbd_sad128x128x4d avx2/;
    specialize qw/aom_highbd_sad128x64x4d  avx2/;
    specialize qw/aom_highbd_sad64x128x4d  avx2/;
    specialize qw/aom_highbd_sad64x64x4d   avx2/;
    specialize qw/aom_highbd_sad64x32x4d   avx2/;
    specialize qw/aom_highbd_sad32x64x4d   avx2/;
    specialize qw/aom_highbd_sad32x32x4d   avx2/;
    specialize qw/aom_highbd_sad32x16x4d   avx2/;
    specialize qw/aom_highbd_sad16x32x4d   avx2/;
    specialize qw/aom_highbd_sad16x16x4d   avx2/;
    specialize qw/aom_highbd_sad16x8x4d    avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
1013 1014
  }

James Zern's avatar
James Zern committed
1015 1016 1017 1018 1019 1020
  #
  # Structured Similarity (SSIM)
  #
  if (aom_config("CONFIG_INTERNAL_STATS") eq "yes") {
    add_proto qw/void aom_ssim_parms_8x8/, "const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
    specialize qw/aom_ssim_parms_8x8/, "$sse2_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
1021

James Zern's avatar
James Zern committed
1022 1023
    add_proto qw/void aom_ssim_parms_16x16/, "const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
    specialize qw/aom_ssim_parms_16x16/, "$sse2_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
1024

James Zern's avatar
James Zern committed
1025 1026 1027
    if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
      add_proto qw/void aom_highbd_ssim_parms_8x8/, "const uint16_t *s, int sp, const uint16_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
    }
Yaowu Xu's avatar
Yaowu Xu committed
1028
  }
1029
}  # CONFIG_AV1_ENCODER
Yaowu Xu's avatar
Yaowu Xu committed
1030

1031
if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
1032

James Zern's avatar
James Zern committed
1033 1034 1035 1036
  #
  # Specialty Variance
  #
  add_proto qw/void aom_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
Yaowu Xu's avatar
Yaowu Xu committed
1037

James Zern's avatar
James Zern committed
1038
  add_proto qw/void aom_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
Yaowu Xu's avatar
Yaowu Xu committed
1039

James Zern's avatar
James Zern committed
1040 1041
  specialize qw/aom_get16x16var sse2 avx2 neon msa/;
  specialize qw/aom_get8x8var   sse2      neon msa/;
Yaowu Xu's avatar
Yaowu Xu committed
1042 1043


James Zern's avatar
James Zern committed
1044 1045 1046 1047
  add_proto qw/unsigned int aom_mse16x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
  add_proto qw/unsigned int aom_mse16x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
  add_proto qw/unsigned int aom_mse8x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
  add_proto qw/unsigned int aom_mse8x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
Yaowu Xu's avatar
Yaowu Xu committed
1048

James Zern's avatar
James Zern committed
1049 1050 1051