aom_dsp_rtcd_defs.pl 103 KB
Newer Older
Yaowu Xu's avatar
Yaowu Xu committed
1 2 3 4 5 6 7 8
sub aom_dsp_forward_decls() {
print <<EOF
/*
 * DSP
 */

#include "aom/aom_integer.h"
#include "aom_dsp/aom_dsp_common.h"
9
#include "av1/common/enums.h"
Yaowu Xu's avatar
Yaowu Xu committed
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42

EOF
}
forward_decls qw/aom_dsp_forward_decls/;

# optimizations which depend on multiple features
$avx2_ssse3 = '';
if ((aom_config("HAVE_AVX2") eq "yes") && (aom_config("HAVE_SSSE3") eq "yes")) {
  $avx2_ssse3 = 'avx2';
}

# functions that are 64 bit only.
$mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 = $avx_x86_64 = $avx2_x86_64 = '';
if ($opts{arch} eq "x86_64") {
  $mmx_x86_64 = 'mmx';
  $sse2_x86_64 = 'sse2';
  $ssse3_x86_64 = 'ssse3';
  $avx_x86_64 = 'avx';
  $avx2_x86_64 = 'avx2';
}

if (aom_config("CONFIG_EXT_PARTITION") eq "yes") {
  @block_widths = (4, 8, 16, 32, 64, 128)
} else {
  @block_widths = (4, 8, 16, 32, 64)
}

@block_sizes = ();
foreach $w (@block_widths) {
  foreach $h (@block_widths) {
    push @block_sizes, [$w, $h] if ($w <= 2*$h && $h <= 2*$w) ;
  }
}
43
if (aom_config("CONFIG_EXT_PARTITION_TYPES") eq "yes") {
44 45 46 47
  push @block_sizes, [4, 16];
  push @block_sizes, [16, 4];
  push @block_sizes, [8, 32];
  push @block_sizes, [32, 8];
48 49
  push @block_sizes, [16, 64];
  push @block_sizes, [64, 16];
50
}
Yaowu Xu's avatar
Yaowu Xu committed
51

52 53 54 55
@tx_dims = (2, 4, 8, 16, 32);
if (aom_config("CONFIG_TX64X64") eq "yes") {
  push @tx_dims, '64';
}
56

57 58 59 60 61 62 63 64
@tx_sizes = ();
foreach $w (@tx_dims) {
  push @tx_sizes, [$w, $w];
  foreach $h (@tx_dims) {
    push @tx_sizes, [$w, $h] if ($w >=4 && $h >=4 && ($w == 2*$h || $h == 2*$w));
  }
}

Urvang Joshi's avatar
Urvang Joshi committed
65 66 67
@pred_names = qw/dc dc_top dc_left dc_128 v h d207e d63e d45e d117 d135 d153 paeth smooth/;
if (aom_config("CONFIG_SMOOTH_HV") eq "yes") {
  push @pred_names, qw/smooth_v smooth_h/;
68
}
69

70 71 72
#
# Intra prediction
#
73

74 75
foreach (@tx_sizes) {
  ($w, $h) = @$_;
76 77 78
  foreach $pred_name (@pred_names) {
    add_proto "void", "aom_${pred_name}_predictor_${w}x${h}",
              "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
79
    if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
80 81 82 83 84
      add_proto "void", "aom_highbd_${pred_name}_predictor_${w}x${h}",
                "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    }
  }
}
85

86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
specialize qw/aom_dc_top_predictor_4x4 msa neon sse2/;
specialize qw/aom_dc_top_predictor_4x8 sse2/;
specialize qw/aom_dc_top_predictor_8x4 sse2/;
specialize qw/aom_dc_top_predictor_8x8 neon msa sse2/;
specialize qw/aom_dc_top_predictor_8x16 sse2/;
specialize qw/aom_dc_top_predictor_16x8 sse2/;
specialize qw/aom_dc_top_predictor_16x16 neon msa sse2/;
specialize qw/aom_dc_top_predictor_16x32 sse2/;
specialize qw/aom_dc_top_predictor_32x16 sse2/;
specialize qw/aom_dc_top_predictor_32x32 msa neon sse2/;
specialize qw/aom_dc_left_predictor_4x4 msa neon sse2/;
specialize qw/aom_dc_left_predictor_4x8 sse2/;
specialize qw/aom_dc_left_predictor_8x4 sse2/;
specialize qw/aom_dc_left_predictor_8x8 neon msa sse2/;
specialize qw/aom_dc_left_predictor_8x16 sse2/;
specialize qw/aom_dc_left_predictor_16x8 sse2/;
specialize qw/aom_dc_left_predictor_16x16 neon msa sse2/;
specialize qw/aom_dc_left_predictor_16x32 sse2/;
specialize qw/aom_dc_left_predictor_32x16 sse2/;
specialize qw/aom_dc_left_predictor_32x32 msa neon sse2/;
specialize qw/aom_dc_128_predictor_4x4 msa neon sse2/;
specialize qw/aom_dc_128_predictor_4x8 sse2/;
specialize qw/aom_dc_128_predictor_8x4 sse2/;
specialize qw/aom_dc_128_predictor_8x8 neon msa sse2/;
specialize qw/aom_dc_128_predictor_8x16 sse2/;
specialize qw/aom_dc_128_predictor_16x8 sse2/;
specialize qw/aom_dc_128_predictor_16x16 neon msa sse2/;
specialize qw/aom_dc_128_predictor_16x32 sse2/;
specialize qw/aom_dc_128_predictor_32x16 sse2/;
specialize qw/aom_dc_128_predictor_32x32 msa neon sse2/;
116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135
specialize qw/aom_v_predictor_4x4 neon msa sse2/;
specialize qw/aom_v_predictor_4x8 sse2/;
specialize qw/aom_v_predictor_8x4 sse2/;
specialize qw/aom_v_predictor_8x8 neon msa sse2/;
specialize qw/aom_v_predictor_8x16 sse2/;
specialize qw/aom_v_predictor_16x8 sse2/;
specialize qw/aom_v_predictor_16x16 neon msa sse2/;
specialize qw/aom_v_predictor_16x32 sse2/;
specialize qw/aom_v_predictor_32x16 sse2/;
specialize qw/aom_v_predictor_32x32 neon msa sse2/;
specialize qw/aom_h_predictor_4x8 sse2/;
specialize qw/aom_h_predictor_4x4 neon dspr2 msa sse2/;
specialize qw/aom_h_predictor_8x4 sse2/;
specialize qw/aom_h_predictor_8x8 neon dspr2 msa sse2/;
specialize qw/aom_h_predictor_8x16 sse2/;
specialize qw/aom_h_predictor_16x8 sse2/;
specialize qw/aom_h_predictor_16x16 neon dspr2 msa sse2/;
specialize qw/aom_h_predictor_16x32 sse2/;
specialize qw/aom_h_predictor_32x16 sse2/;
specialize qw/aom_h_predictor_32x32 neon msa sse2/;
136

137
specialize qw/aom_d63e_predictor_4x4 ssse3/;
Yaowu Xu's avatar
Yaowu Xu committed
138 139 140
specialize qw/aom_d135_predictor_4x4 neon/;
specialize qw/aom_d153_predictor_4x4 ssse3/;
specialize qw/aom_dc_predictor_4x4 dspr2 msa neon sse2/;
141
specialize qw/aom_dc_predictor_4x8 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
142
specialize qw/aom_d153_predictor_8x8 ssse3/;
143
specialize qw/aom_dc_predictor_8x4 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
144
specialize qw/aom_dc_predictor_8x8 dspr2 neon msa sse2/;
145
specialize qw/aom_dc_predictor_8x16 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
146
specialize qw/aom_d153_predictor_16x16 ssse3/;
147
specialize qw/aom_dc_predictor_16x8 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
148
specialize qw/aom_dc_predictor_16x16 dspr2 neon msa sse2/;
149
specialize qw/aom_dc_predictor_16x32 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
150
specialize qw/aom_d153_predictor_32x32 ssse3/;
151

152
specialize qw/aom_dc_predictor_32x16 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
153 154
specialize qw/aom_dc_predictor_32x32 msa neon sse2/;

155
if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
156
  specialize qw/aom_highbd_v_predictor_4x4 sse2/;
157 158
  specialize qw/aom_highbd_v_predictor_4x8 sse2/;
  specialize qw/aom_highbd_v_predictor_8x4 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
159
  specialize qw/aom_highbd_v_predictor_8x8 sse2/;
160 161
  specialize qw/aom_highbd_v_predictor_8x16 sse2/;
  specialize qw/aom_highbd_v_predictor_16x8 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
162
  specialize qw/aom_highbd_v_predictor_16x16 sse2/;
163 164
  specialize qw/aom_highbd_v_predictor_16x32 sse2/;
  specialize qw/aom_highbd_v_predictor_32x16 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
165
  specialize qw/aom_highbd_v_predictor_32x32 sse2/;
166 167 168 169 170 171 172 173 174
  specialize qw/aom_highbd_dc_predictor_4x4 sse2/;
  specialize qw/aom_highbd_dc_predictor_4x8 sse2/;
  specialize qw/aom_highbd_dc_predictor_8x4 sse2/;;
  specialize qw/aom_highbd_dc_predictor_8x8 sse2/;;
  specialize qw/aom_highbd_dc_predictor_8x16 sse2/;;
  specialize qw/aom_highbd_dc_predictor_16x8 sse2/;
  specialize qw/aom_highbd_dc_predictor_16x16 sse2/;
  specialize qw/aom_highbd_dc_predictor_16x32 sse2/;
  specialize qw/aom_highbd_dc_predictor_32x16 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
175
  specialize qw/aom_highbd_dc_predictor_32x32 sse2/;
176 177 178 179 180 181 182 183 184 185
  specialize qw/aom_highbd_h_predictor_4x4 sse2/;
  specialize qw/aom_highbd_h_predictor_4x8 sse2/;
  specialize qw/aom_highbd_h_predictor_8x4 sse2/;
  specialize qw/aom_highbd_h_predictor_8x8 sse2/;
  specialize qw/aom_highbd_h_predictor_8x16 sse2/;
  specialize qw/aom_highbd_h_predictor_16x8 sse2/;
  specialize qw/aom_highbd_h_predictor_16x16 sse2/;
  specialize qw/aom_highbd_h_predictor_16x32 sse2/;
  specialize qw/aom_highbd_h_predictor_32x16 sse2/;
  specialize qw/aom_highbd_h_predictor_32x32 sse2/;
186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
  specialize qw/aom_highbd_dc_left_predictor_4x4 sse2/;
  specialize qw/aom_highbd_dc_top_predictor_4x4 sse2/;
  specialize qw/aom_highbd_dc_128_predictor_4x4 sse2/;
  specialize qw/aom_highbd_dc_left_predictor_4x8 sse2/;
  specialize qw/aom_highbd_dc_top_predictor_4x8 sse2/;
  specialize qw/aom_highbd_dc_128_predictor_4x8 sse2/;
  specialize qw/aom_highbd_dc_left_predictor_8x4 sse2/;
  specialize qw/aom_highbd_dc_top_predictor_8x4 sse2/;
  specialize qw/aom_highbd_dc_128_predictor_8x4 sse2/;
  specialize qw/aom_highbd_dc_left_predictor_8x8 sse2/;
  specialize qw/aom_highbd_dc_top_predictor_8x8 sse2/;
  specialize qw/aom_highbd_dc_128_predictor_8x8 sse2/;
  specialize qw/aom_highbd_dc_left_predictor_8x16 sse2/;
  specialize qw/aom_highbd_dc_top_predictor_8x16 sse2/;
  specialize qw/aom_highbd_dc_128_predictor_8x16 sse2/;
  specialize qw/aom_highbd_dc_left_predictor_16x8 sse2/;
  specialize qw/aom_highbd_dc_top_predictor_16x8 sse2/;
  specialize qw/aom_highbd_dc_128_predictor_16x8 sse2/;
  specialize qw/aom_highbd_dc_left_predictor_16x16 sse2/;
  specialize qw/aom_highbd_dc_top_predictor_16x16 sse2/;
  specialize qw/aom_highbd_dc_128_predictor_16x16 sse2/;
  specialize qw/aom_highbd_dc_left_predictor_16x32 sse2/;
  specialize qw/aom_highbd_dc_top_predictor_16x32 sse2/;
  specialize qw/aom_highbd_dc_128_predictor_16x32 sse2/;
  specialize qw/aom_highbd_dc_left_predictor_32x16 sse2/;
  specialize qw/aom_highbd_dc_top_predictor_32x16 sse2/;
  specialize qw/aom_highbd_dc_128_predictor_32x16 sse2/;
  specialize qw/aom_highbd_dc_left_predictor_32x32 sse2/;
  specialize qw/aom_highbd_dc_top_predictor_32x32 sse2/;
  specialize qw/aom_highbd_dc_128_predictor_32x32 sse2/;
216
}  # CONFIG_HIGHBITDEPTH
Yaowu Xu's avatar
Yaowu Xu committed
217 218 219 220

#
# Sub Pixel Filters
#
Fergus Simpson's avatar
Fergus Simpson committed
221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241
add_proto qw/void aom_convolve_copy/,             "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve_avg/,              "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8/,                 "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_horiz/,           "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_vert/,            "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_avg/,             "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_avg_horiz/,       "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_avg_vert/,        "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_scaled_2d/,                 "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_scaled_horiz/,              "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_scaled_vert/,               "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_scaled_avg_2d/,             "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_scaled_avg_horiz/,          "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_scaled_avg_vert/,           "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";

add_proto qw/void aom_convolve8_horiz_scale/,     "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int subpel_x, int x_step_q4, const int16_t *filter_y, int subpel_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_vert_scale/,      "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int subpel_x, int x_step_q4, const int16_t *filter_y, int subpel_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_avg_horiz_scale/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int subpel_x, int x_step_q4, const int16_t *filter_y, int subpel_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_avg_vert_scale/,  "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int subpel_x, int x_step_q4, const int16_t *filter_y, int subpel_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_scale/,           "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int subpel_x, int x_step_q4, const int16_t *filter_y, int subpel_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_avg_scale/,       "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int subpel_x, int x_step_q4, const int16_t *filter_y, int subpel_y, int y_step_q4, int w, int h";
Yaowu Xu's avatar
Yaowu Xu committed
242 243 244 245 246 247 248 249 250 251 252

specialize qw/aom_convolve_copy       sse2      /;
specialize qw/aom_convolve_avg        sse2      /;
specialize qw/aom_convolve8           sse2 ssse3/, "$avx2_ssse3";
specialize qw/aom_convolve8_horiz     sse2 ssse3/, "$avx2_ssse3";
specialize qw/aom_convolve8_vert      sse2 ssse3/, "$avx2_ssse3";
specialize qw/aom_convolve8_avg       sse2 ssse3/;
specialize qw/aom_convolve8_avg_horiz sse2 ssse3/;
specialize qw/aom_convolve8_avg_vert  sse2 ssse3/;
specialize qw/aom_scaled_2d                ssse3/;

253 254 255 256
if (aom_config("CONFIG_LOOP_RESTORATION") eq "yes") {
  add_proto qw/void aom_convolve8_add_src/,       "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
  add_proto qw/void aom_convolve8_add_src_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
  add_proto qw/void aom_convolve8_add_src_vert/,  "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
257 258 259
  add_proto qw/void aom_convolve8_add_src_hip/,       "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
  add_proto qw/void aom_convolve8_add_src_horiz_hip/, "const uint8_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
  add_proto qw/void aom_convolve8_add_src_vert_hip/,  "const uint16_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
260

261 262 263
  specialize qw/aom_convolve8_add_src ssse3/;
  specialize qw/aom_convolve8_add_src_horiz ssse3/;
  specialize qw/aom_convolve8_add_src_vert ssse3/;
264
  specialize qw/aom_convolve8_add_src_hip sse2/;
265 266
}  # CONFIG_LOOP_RESTORATION

Yaowu Xu's avatar
Yaowu Xu committed
267 268 269 270 271 272 273 274 275 276 277 278
# TODO(any): These need to be extended to up to 128x128 block sizes
if (!(aom_config("CONFIG_AV1") eq "yes" && aom_config("CONFIG_EXT_PARTITION") eq "yes")) {
  specialize qw/aom_convolve_copy       neon dspr2 msa/;
  specialize qw/aom_convolve_avg        neon dspr2 msa/;
  specialize qw/aom_convolve8           neon dspr2 msa/;
  specialize qw/aom_convolve8_horiz     neon dspr2 msa/;
  specialize qw/aom_convolve8_vert      neon dspr2 msa/;
  specialize qw/aom_convolve8_avg       neon dspr2 msa/;
  specialize qw/aom_convolve8_avg_horiz neon dspr2 msa/;
  specialize qw/aom_convolve8_avg_vert  neon dspr2 msa/;
}

279
if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
280
  add_proto qw/void aom_highbd_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
281
  specialize qw/aom_highbd_convolve_copy sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
282 283

  add_proto qw/void aom_highbd_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
284
  specialize qw/aom_highbd_convolve_avg sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
285 286

  add_proto qw/void aom_highbd_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
287
  specialize qw/aom_highbd_convolve8 avx2/, "$sse2_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
288 289

  add_proto qw/void aom_highbd_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
290
  specialize qw/aom_highbd_convolve8_horiz avx2/, "$sse2_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
291 292

  add_proto qw/void aom_highbd_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
293
  specialize qw/aom_highbd_convolve8_vert avx2/, "$sse2_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
294 295

  add_proto qw/void aom_highbd_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
296
  specialize qw/aom_highbd_convolve8_avg avx2/, "$sse2_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
297 298

  add_proto qw/void aom_highbd_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
299
  specialize qw/aom_highbd_convolve8_avg_horiz avx2/, "$sse2_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
300 301

  add_proto qw/void aom_highbd_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
302
  specialize qw/aom_highbd_convolve8_avg_vert avx2/, "$sse2_x86_64";
303 304 305 306 307

  if (aom_config("CONFIG_LOOP_RESTORATION") eq "yes") {
    add_proto qw/void aom_highbd_convolve8_add_src/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
    add_proto qw/void aom_highbd_convolve8_add_src_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
    add_proto qw/void aom_highbd_convolve8_add_src_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
308 309 310
    add_proto qw/void aom_highbd_convolve8_add_src_hip/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
    add_proto qw/void aom_highbd_convolve8_add_src_horiz_hip/, "const uint8_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
    add_proto qw/void aom_highbd_convolve8_add_src_vert_hip/, "const uint16_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
311

312
    specialize qw/aom_highbd_convolve8_add_src/, "$sse2_x86_64";
313
    specialize qw/aom_highbd_convolve8_add_src_hip ssse3/;
314 315
    # The _horiz/_vert functions are currently unused, so we don't bother
    # specialising them.
316
  }  # CONFIG_LOOP_RESTORATION
317
}  # CONFIG_HIGHBITDEPTH
Yaowu Xu's avatar
Yaowu Xu committed
318 319 320 321 322

#
# Loopfilter
#
add_proto qw/void aom_lpf_vertical_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
323 324 325 326 327 328
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") {
  specialize qw/aom_lpf_vertical_16 sse2/;
} else {
  specialize qw/aom_lpf_vertical_16 sse2 neon_asm dspr2 msa/;
  $aom_lpf_vertical_16_neon_asm=aom_lpf_vertical_16_neon;
}
Yaowu Xu's avatar
Yaowu Xu committed
329 330

add_proto qw/void aom_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
331 332 333 334
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") ne "yes") {
  specialize qw/aom_lpf_vertical_16_dual sse2 neon_asm dspr2 msa/;
  $aom_lpf_vertical_16_dual_neon_asm=aom_lpf_vertical_16_dual_neon;
}
Yaowu Xu's avatar
Yaowu Xu committed
335 336

add_proto qw/void aom_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
337 338 339 340 341
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") {
  specialize qw/aom_lpf_vertical_8 sse2/;
} else {
  specialize qw/aom_lpf_vertical_8 sse2 neon dspr2 msa/;
}
Yaowu Xu's avatar
Yaowu Xu committed
342 343

add_proto qw/void aom_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
344 345 346 347
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") ne "yes") {
  specialize qw/aom_lpf_vertical_8_dual sse2 neon_asm dspr2 msa/;
  $aom_lpf_vertical_8_dual_neon_asm=aom_lpf_vertical_8_dual_neon;
}
Yaowu Xu's avatar
Yaowu Xu committed
348 349

add_proto qw/void aom_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
350 351 352 353 354
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") {
  specialize qw/aom_lpf_vertical_4 sse2/;
} else {
  specialize qw/aom_lpf_vertical_4 sse2 neon dspr2 msa/;
}
Yaowu Xu's avatar
Yaowu Xu committed
355 356

add_proto qw/void aom_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
357 358 359
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") ne "yes") {
  specialize qw/aom_lpf_vertical_4_dual sse2 neon dspr2 msa/;
}
Yaowu Xu's avatar
Yaowu Xu committed
360 361

add_proto qw/void aom_lpf_horizontal_edge_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
362 363 364 365 366 367
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") {
  specialize qw/aom_lpf_horizontal_edge_8 sse2/;
} else {
  specialize qw/aom_lpf_horizontal_edge_8 sse2 avx2 neon_asm dspr2 msa/;
  $aom_lpf_horizontal_edge_8_neon_asm=aom_lpf_horizontal_edge_8_neon;
}
Yaowu Xu's avatar
Yaowu Xu committed
368 369

add_proto qw/void aom_lpf_horizontal_edge_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
370 371 372 373 374 375
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") {
  specialize qw/aom_lpf_horizontal_edge_16 sse2/;
} else {
  specialize qw/aom_lpf_horizontal_edge_16 sse2 avx2 neon_asm dspr2 msa/;
  $aom_lpf_horizontal_edge_16_neon_asm=aom_lpf_horizontal_edge_16_neon;
}
Yaowu Xu's avatar
Yaowu Xu committed
376 377

add_proto qw/void aom_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
378 379 380 381 382
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") {
  specialize qw/aom_lpf_horizontal_8 sse2/;
} else {
  specialize qw/aom_lpf_horizontal_8 sse2 neon dspr2 msa/;
}
Yaowu Xu's avatar
Yaowu Xu committed
383 384

add_proto qw/void aom_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
385 386 387 388
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") ne "yes") {
  specialize qw/aom_lpf_horizontal_8_dual sse2 neon_asm dspr2 msa/;
  $aom_lpf_horizontal_8_dual_neon_asm=aom_lpf_horizontal_8_dual_neon;
}
Yaowu Xu's avatar
Yaowu Xu committed
389 390

add_proto qw/void aom_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
391 392 393 394 395
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") {
  specialize qw/aom_lpf_horizontal_4 sse2/;
} else {
  specialize qw/aom_lpf_horizontal_4 sse2 neon dspr2 msa/;
}
Yaowu Xu's avatar
Yaowu Xu committed
396 397

add_proto qw/void aom_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
398 399 400
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") ne "yes") {
  specialize qw/aom_lpf_horizontal_4_dual sse2 neon dspr2 msa/;
}
Yaowu Xu's avatar
Yaowu Xu committed
401

402
if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
403 404 405 406
  add_proto qw/void aom_highbd_lpf_vertical_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
  specialize qw/aom_highbd_lpf_vertical_16 sse2/;

  add_proto qw/void aom_highbd_lpf_vertical_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
Yi Luo's avatar
Yi Luo committed
407
  specialize qw/aom_highbd_lpf_vertical_16_dual sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
408 409 410 411 412

  add_proto qw/void aom_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
  specialize qw/aom_highbd_lpf_vertical_8 sse2/;

  add_proto qw/void aom_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
Yi Luo's avatar
Yi Luo committed
413
  specialize qw/aom_highbd_lpf_vertical_8_dual sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
414 415 416 417 418

  add_proto qw/void aom_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
  specialize qw/aom_highbd_lpf_vertical_4 sse2/;

  add_proto qw/void aom_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
Yi Luo's avatar
Yi Luo committed
419
  specialize qw/aom_highbd_lpf_vertical_4_dual sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
420 421 422 423 424

  add_proto qw/void aom_highbd_lpf_horizontal_edge_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
  specialize qw/aom_highbd_lpf_horizontal_edge_8 sse2/;

  add_proto qw/void aom_highbd_lpf_horizontal_edge_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
Yi Luo's avatar
Yi Luo committed
425
  specialize qw/aom_highbd_lpf_horizontal_edge_16 sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
426 427 428 429 430

  add_proto qw/void aom_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
  specialize qw/aom_highbd_lpf_horizontal_8 sse2/;

  add_proto qw/void aom_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
Yi Luo's avatar
Yi Luo committed
431
  specialize qw/aom_highbd_lpf_horizontal_8_dual sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
432 433 434 435 436

  add_proto qw/void aom_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
  specialize qw/aom_highbd_lpf_horizontal_4 sse2/;

  add_proto qw/void aom_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
Yi Luo's avatar
Yi Luo committed
437
  specialize qw/aom_highbd_lpf_horizontal_4_dual sse2 avx2/;
438
}  # CONFIG_HIGHBITDEPTH
Yaowu Xu's avatar
Yaowu Xu committed
439 440 441 442 443 444 445 446

#
# Encoder functions.
#

#
# Forward transform
#
447
if ((aom_config("CONFIG_AV1_ENCODER") eq "yes") || (aom_config("CONFIG_PVQ") eq "yes")){
448
  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
449 450 451 452 453 454 455
    add_proto qw/void aom_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct4x4 sse2/;

    add_proto qw/void aom_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct4x4_1 sse2/;

    add_proto qw/void aom_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
456
    specialize qw/aom_fdct8x8 sse2/, "$ssse3_x86_64";
457 458 459 460 461 462 463 464 465 466 467

    add_proto qw/void aom_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct16x16 sse2/;

    add_proto qw/void aom_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct32x32 sse2 avx2/;

    add_proto qw/void aom_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct32x32_rd sse2 avx2/;

    # High bit depth
468 469
    add_proto qw/void aom_highbd_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_highbd_fdct4x4 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
470

471 472
    add_proto qw/void aom_highbd_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_highbd_fdct8x8 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
473

474 475
    add_proto qw/void aom_highbd_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_highbd_fdct16x16 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
476

477 478
    add_proto qw/void aom_highbd_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_highbd_fdct32x32 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
479

480 481
    add_proto qw/void aom_highbd_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_highbd_fdct32x32_rd sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
482

483 484 485
  } else {
    add_proto qw/void aom_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct4x4 sse2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
486

487 488
    add_proto qw/void aom_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct4x4_1 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
489

490 491
    add_proto qw/void aom_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct8x8 sse2 neon msa/, "$ssse3_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
492

493 494
    add_proto qw/void aom_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct16x16 sse2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
495

496 497
    add_proto qw/void aom_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct32x32 sse2 avx2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
498

499 500
    add_proto qw/void aom_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct32x32_rd sse2 avx2 msa/;
501
  }  # CONFIG_HIGHBITDEPTH
Yaowu Xu's avatar
Yaowu Xu committed
502 503 504 505 506 507 508 509 510 511 512 513 514 515
}  # CONFIG_AV1_ENCODER

#
# Inverse transform
if (aom_config("CONFIG_AV1") eq "yes") {
  add_proto qw/void aom_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

  add_proto qw/void aom_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_iwht4x4_16_add sse2/;

  add_proto qw/void aom_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

  add_proto qw/void aom_highbd_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

James Zern's avatar
James Zern committed
516 517
  add_proto qw/void aom_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct4x4_16_add sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
518

James Zern's avatar
James Zern committed
519 520
  add_proto qw/void aom_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct4x4_1_add sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
521

James Zern's avatar
James Zern committed
522 523
  add_proto qw/void aom_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct8x8_64_add sse2 ssse3/;
Yaowu Xu's avatar
Yaowu Xu committed
524

James Zern's avatar
James Zern committed
525 526
  add_proto qw/void aom_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct8x8_12_add sse2 ssse3/;
Yaowu Xu's avatar
Yaowu Xu committed
527

James Zern's avatar
James Zern committed
528 529
  add_proto qw/void aom_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct8x8_1_add sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
530

James Zern's avatar
James Zern committed
531 532
  add_proto qw/void aom_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct16x16_256_add sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
533

James Zern's avatar
James Zern committed
534 535
  add_proto qw/void aom_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct16x16_38_add avx2/;
536

James Zern's avatar
James Zern committed
537 538
  add_proto qw/void aom_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct16x16_10_add sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
539

James Zern's avatar
James Zern committed
540 541
  add_proto qw/void aom_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct16x16_1_add sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
542

James Zern's avatar
James Zern committed
543 544
  add_proto qw/void aom_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct32x32_1024_add sse2 ssse3 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
545

James Zern's avatar
James Zern committed
546 547 548 549
  add_proto qw/void aom_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct32x32_135_add sse2 ssse3 avx2/;
  # Need to add 135 eob idct32x32 implementations.
  $aom_idct32x32_135_add_sse2=aom_idct32x32_1024_add_sse2;
Yaowu Xu's avatar
Yaowu Xu committed
550

James Zern's avatar
James Zern committed
551 552
  add_proto qw/void aom_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct32x32_34_add sse2 ssse3 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
553

James Zern's avatar
James Zern committed
554 555 556 557
  add_proto qw/void aom_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_idct32x32_1_add sse2 avx2/;
  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
  } else {
Yaowu Xu's avatar
Yaowu Xu committed
558 559 560 561 562 563 564 565 566 567
    add_proto qw/void aom_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/aom_idct4x4_1_add sse2 neon dspr2 msa/;

    add_proto qw/void aom_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/aom_idct4x4_16_add sse2 neon dspr2 msa/;

    add_proto qw/void aom_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/aom_idct8x8_1_add sse2 neon dspr2 msa/;

    add_proto qw/void aom_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
568
    specialize qw/aom_idct8x8_64_add sse2 ssse3 neon dspr2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
569 570

    add_proto qw/void aom_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
571
    specialize qw/aom_idct8x8_12_add sse2 ssse3 neon dspr2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
572 573

    add_proto qw/void aom_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
Yi Luo's avatar
Yi Luo committed
574
    specialize qw/aom_idct16x16_1_add sse2 avx2 neon dspr2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
575 576

    add_proto qw/void aom_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
Yi Luo's avatar
Yi Luo committed
577
    specialize qw/aom_idct16x16_256_add sse2 avx2 neon dspr2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
578

579
    add_proto qw/void aom_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
Yi Luo's avatar
Yi Luo committed
580
    specialize qw/aom_idct16x16_38_add avx2/;
581

Yaowu Xu's avatar
Yaowu Xu committed
582
    add_proto qw/void aom_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
Yi Luo's avatar
Yi Luo committed
583
    specialize qw/aom_idct16x16_10_add sse2 avx2 neon dspr2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
584 585

    add_proto qw/void aom_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
Yi Luo's avatar
Yi Luo committed
586
    specialize qw/aom_idct32x32_1024_add sse2 ssse3 avx2 neon dspr2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
587 588

    add_proto qw/void aom_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
Yi Luo's avatar
Yi Luo committed
589
    specialize qw/aom_idct32x32_135_add sse2 ssse3 avx2 neon dspr2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
590 591 592 593 594 595 596
    # Need to add 135 eob idct32x32 implementations.
    $aom_idct32x32_135_add_sse2=aom_idct32x32_1024_add_sse2;
    $aom_idct32x32_135_add_neon=aom_idct32x32_1024_add_neon;
    $aom_idct32x32_135_add_dspr2=aom_idct32x32_1024_add_dspr2;
    $aom_idct32x32_135_add_msa=aom_idct32x32_1024_add_msa;

    add_proto qw/void aom_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
Yi Luo's avatar
Yi Luo committed
597
    specialize qw/aom_idct32x32_34_add sse2 ssse3 avx2 neon dspr2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
598 599 600 601
    # Need to add 34 eob idct32x32 neon implementation.
    $aom_idct32x32_34_add_neon=aom_idct32x32_1024_add_neon;

    add_proto qw/void aom_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
Yi Luo's avatar
Yi Luo committed
602
    specialize qw/aom_idct32x32_1_add sse2 avx2 neon dspr2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
603 604 605 606 607 608

    add_proto qw/void aom_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/aom_iwht4x4_1_add msa/;

    add_proto qw/void aom_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/aom_iwht4x4_16_add msa sse2/;
James Zern's avatar
James Zern committed
609
  }  # CONFIG_HIGHBITDEPTH
Yaowu Xu's avatar
Yaowu Xu committed
610 611 612 613 614
}  # CONFIG_AV1

#
# Quantization
#
Thomas Davies's avatar
Thomas Davies committed
615
if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
James Zern's avatar
James Zern committed
616 617
  add_proto qw/void aom_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
  specialize qw/aom_quantize_b sse2/, "$ssse3_x86_64", "$avx_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
618

James Zern's avatar
James Zern committed
619 620
  add_proto qw/void aom_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
  specialize qw/aom_quantize_b_32x32/, "$ssse3_x86_64", "$avx_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
621

James Zern's avatar
James Zern committed
622
  add_proto qw/void aom_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
Thomas Davies's avatar
Thomas Davies committed
623
}  # CONFIG_AV1_ENCODER
624

Thomas Davies's avatar
Thomas Davies committed
625 626 627
if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
  add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
  specialize qw/aom_highbd_quantize_b sse2 avx2/;
628

Thomas Davies's avatar
Thomas Davies committed
629 630
  add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
  specialize qw/aom_highbd_quantize_b_32x32 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
631

Thomas Davies's avatar
Thomas Davies committed
632
  add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
633

Thomas Davies's avatar
Thomas Davies committed
634
}  # CONFIG_AV1_ENCODER
Yaowu Xu's avatar
Yaowu Xu committed
635 636 637 638
if (aom_config("CONFIG_AV1") eq "yes") {
  #
  # Alpha blending with mask
  #
639 640 641
  if (aom_config("CONFIG_CONVOLVE_ROUND") eq "yes") {
    add_proto qw/void aom_blend_a64_d32_mask/, "int32_t *dst, uint32_t dst_stride, const int32_t *src0, uint32_t src0_stride, const int32_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx";
  }
Yaowu Xu's avatar
Yaowu Xu committed
642 643 644 645 646 647 648
  add_proto qw/void aom_blend_a64_mask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx";
  add_proto qw/void aom_blend_a64_hmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w";
  add_proto qw/void aom_blend_a64_vmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w";
  specialize "aom_blend_a64_mask", qw/sse4_1/;
  specialize "aom_blend_a64_hmask", qw/sse4_1/;
  specialize "aom_blend_a64_vmask", qw/sse4_1/;

649
  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
650 651 652 653 654 655 656 657 658
    add_proto qw/void aom_highbd_blend_a64_mask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx, int bd";
    add_proto qw/void aom_highbd_blend_a64_hmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w, int bd";
    add_proto qw/void aom_highbd_blend_a64_vmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w, int bd";
    specialize "aom_highbd_blend_a64_mask", qw/sse4_1/;
    specialize "aom_highbd_blend_a64_hmask", qw/sse4_1/;
    specialize "aom_highbd_blend_a64_vmask", qw/sse4_1/;
  }
}  # CONFIG_AV1

659
if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
660
  #
James Zern's avatar
James Zern committed
661
  # Block subtraction
Yaowu Xu's avatar
Yaowu Xu committed
662
  #
James Zern's avatar
James Zern committed
663 664 665 666 667 668 669 670 671 672 673 674
  add_proto qw/void aom_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
  specialize qw/aom_subtract_block neon msa sse2/;

  if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
    #
    # Sum of Squares
    #
    add_proto qw/uint64_t aom_sum_squares_2d_i16/, "const int16_t *src, int stride, int width, int height";
    specialize qw/aom_sum_squares_2d_i16 sse2/;

    add_proto qw/uint64_t aom_sum_squares_i16/, "const int16_t *src, uint32_t N";
    specialize qw/aom_sum_squares_i16 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
675 676
  }

James Zern's avatar
James Zern committed
677

Yaowu Xu's avatar
Yaowu Xu committed
678
  #
James Zern's avatar
James Zern committed
679
  # Avg
Yaowu Xu's avatar
Yaowu Xu committed
680
  #
James Zern's avatar
James Zern committed
681 682 683 684 685 686 687 688 689
  if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
    #
    # Avg
    #
    specialize qw/aom_avg_8x8 sse2 neon msa/;
    if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
      add_proto qw/void aom_highbd_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride, int bd";
      specialize qw/aom_highbd_subtract_block sse2/;
    }
Yaowu Xu's avatar
Yaowu Xu committed
690

James Zern's avatar
James Zern committed
691 692 693 694 695 696 697 698
    #
    # Minmax
    #
    add_proto qw/void aom_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
    specialize qw/aom_minmax_8x8 sse2 neon/;
    if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
      add_proto qw/void aom_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
    }
Yaowu Xu's avatar
Yaowu Xu committed
699

James Zern's avatar
James Zern committed
700 701
    add_proto qw/void aom_hadamard_8x8/, "const int16_t *src_diff, int src_stride, int16_t *coeff";
    specialize qw/aom_hadamard_8x8 sse2 neon/, "$ssse3_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
702

James Zern's avatar
James Zern committed
703 704
    add_proto qw/void aom_hadamard_16x16/, "const int16_t *src_diff, int src_stride, int16_t *coeff";
    specialize qw/aom_hadamard_16x16 sse2 neon/;
Yaowu Xu's avatar
Yaowu Xu committed
705

James Zern's avatar
James Zern committed
706 707
    add_proto qw/int aom_satd/, "const int16_t *coeff, int length";
    specialize qw/aom_satd sse2 neon/;
Yaowu Xu's avatar
Yaowu Xu committed
708

James Zern's avatar
James Zern committed
709 710
    add_proto qw/void aom_int_pro_row/, "int16_t *hbuf, const uint8_t *ref, int ref_stride, int height";
    specialize qw/aom_int_pro_row sse2 neon/;
Yaowu Xu's avatar
Yaowu Xu committed
711

James Zern's avatar
James Zern committed
712 713
    add_proto qw/int16_t aom_int_pro_col/, "const uint8_t *ref, int width";
    specialize qw/aom_int_pro_col sse2 neon/;
Yaowu Xu's avatar
Yaowu Xu committed
714

James Zern's avatar
James Zern committed
715 716 717
    add_proto qw/int aom_vector_var/, "const int16_t *ref, const int16_t *src, int bwl";
    specialize qw/aom_vector_var neon sse2/;
  }  # CONFIG_AV1_ENCODER
Yaowu Xu's avatar
Yaowu Xu committed
718

James Zern's avatar
James Zern committed
719 720 721
  #
  # Single block SAD / Single block Avg SAD
  #
Yaowu Xu's avatar
Yaowu Xu committed
722 723
  foreach (@block_sizes) {
    ($w, $h) = @$_;
James Zern's avatar
James Zern committed
724 725
    add_proto qw/unsigned int/, "aom_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
    add_proto qw/unsigned int/, "aom_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
Yaowu Xu's avatar
Yaowu Xu committed
726 727
  }

James Zern's avatar
James Zern committed
728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760
  specialize qw/aom_sad128x128    avx2          sse2/;
  specialize qw/aom_sad128x64     avx2          sse2/;
  specialize qw/aom_sad64x128     avx2          sse2/;
  specialize qw/aom_sad64x64      avx2 neon msa sse2/;
  specialize qw/aom_sad64x32      avx2      msa sse2/;
  specialize qw/aom_sad32x64      avx2      msa sse2/;
  specialize qw/aom_sad32x32      avx2 neon msa sse2/;
  specialize qw/aom_sad32x16      avx2      msa sse2/;
  specialize qw/aom_sad16x32                msa sse2/;
  specialize qw/aom_sad16x16           neon msa sse2/;
  specialize qw/aom_sad16x8            neon msa sse2/;
  specialize qw/aom_sad8x16            neon msa sse2/;
  specialize qw/aom_sad8x8             neon msa sse2/;
  specialize qw/aom_sad8x4                  msa sse2/;
  specialize qw/aom_sad4x8                  msa sse2/;
  specialize qw/aom_sad4x4             neon msa sse2/;

  specialize qw/aom_sad128x128_avg avx2     sse2/;
  specialize qw/aom_sad128x64_avg  avx2     sse2/;
  specialize qw/aom_sad64x128_avg  avx2     sse2/;
  specialize qw/aom_sad64x64_avg   avx2 msa sse2/;
  specialize qw/aom_sad64x32_avg   avx2 msa sse2/;
  specialize qw/aom_sad32x64_avg   avx2 msa sse2/;
  specialize qw/aom_sad32x32_avg   avx2 msa sse2/;
  specialize qw/aom_sad32x16_avg   avx2 msa sse2/;
  specialize qw/aom_sad16x32_avg        msa sse2/;
  specialize qw/aom_sad16x16_avg        msa sse2/;
  specialize qw/aom_sad16x8_avg         msa sse2/;
  specialize qw/aom_sad8x16_avg         msa sse2/;
  specialize qw/aom_sad8x8_avg          msa sse2/;
  specialize qw/aom_sad8x4_avg          msa sse2/;
  specialize qw/aom_sad4x8_avg          msa sse2/;
  specialize qw/aom_sad4x4_avg          msa sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
761

762
  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
763 764
    foreach (@block_sizes) {
      ($w, $h) = @$_;
James Zern's avatar
James Zern committed
765 766 767 768 769 770
      add_proto qw/unsigned int/, "aom_highbd_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
      add_proto qw/unsigned int/, "aom_highbd_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
      if ($w != 128 && $h != 128 && $w != 4) {
        specialize "aom_highbd_sad${w}x${h}", qw/sse2/;
        specialize "aom_highbd_sad${w}x${h}_avg", qw/sse2/;
      }
Yaowu Xu's avatar
Yaowu Xu committed
771
    }
James Zern's avatar
James Zern committed
772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794
    specialize qw/aom_highbd_sad128x128 avx2/;
    specialize qw/aom_highbd_sad128x64  avx2/;
    specialize qw/aom_highbd_sad64x128  avx2/;
    specialize qw/aom_highbd_sad64x64   avx2/;
    specialize qw/aom_highbd_sad64x32   avx2/;
    specialize qw/aom_highbd_sad32x64   avx2/;
    specialize qw/aom_highbd_sad32x32   avx2/;
    specialize qw/aom_highbd_sad32x16   avx2/;
    specialize qw/aom_highbd_sad16x32   avx2/;
    specialize qw/aom_highbd_sad16x16   avx2/;
    specialize qw/aom_highbd_sad16x8    avx2/;

    specialize qw/aom_highbd_sad128x128_avg avx2/;
    specialize qw/aom_highbd_sad128x64_avg  avx2/;
    specialize qw/aom_highbd_sad64x128_avg  avx2/;
    specialize qw/aom_highbd_sad64x64_avg   avx2/;
    specialize qw/aom_highbd_sad64x32_avg   avx2/;
    specialize qw/aom_highbd_sad32x64_avg   avx2/;
    specialize qw/aom_highbd_sad32x32_avg   avx2/;
    specialize qw/aom_highbd_sad32x16_avg   avx2/;
    specialize qw/aom_highbd_sad16x32_avg   avx2/;
    specialize qw/aom_highbd_sad16x16_avg   avx2/;
    specialize qw/aom_highbd_sad16x8_avg    avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
795 796
  }

James Zern's avatar
James Zern committed
797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813
  #
  # Masked SAD
  #
  if (aom_config("CONFIG_EXT_INTER") eq "yes") {
    foreach (@block_sizes) {
      ($w, $h) = @$_;
      add_proto qw/unsigned int/, "aom_masked_sad${w}x${h}", "const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask";
      specialize "aom_masked_sad${w}x${h}", qw/ssse3/;
    }

    if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
      foreach (@block_sizes) {
        ($w, $h) = @$_;
        add_proto qw/unsigned int/, "aom_highbd_masked_sad${w}x${h}", "const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask";
        specialize "aom_highbd_masked_sad${w}x${h}", qw/ssse3/;
      }
    }
Yaowu Xu's avatar
Yaowu Xu committed
814 815
  }

James Zern's avatar
James Zern committed
816 817 818 819
  #
  # OBMC SAD
  #
  if (aom_config("CONFIG_MOTION_VAR") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
820 821
    foreach (@block_sizes) {
      ($w, $h) = @$_;
James Zern's avatar
James Zern committed
822 823 824 825 826 827 828 829 830 831
      add_proto qw/unsigned int/, "aom_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask";
      specialize "aom_obmc_sad${w}x${h}", qw/sse4_1/;
    }

    if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
      foreach (@block_sizes) {
        ($w, $h) = @$_;
        add_proto qw/unsigned int/, "aom_highbd_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask";
        specialize "aom_highbd_obmc_sad${w}x${h}", qw/sse4_1/;
      }
Yaowu Xu's avatar
Yaowu Xu committed
832 833 834
    }
  }

James Zern's avatar
James Zern committed
835 836 837 838 839 840 841 842 843 844 845 846
  #
  # Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally
  #
  # Blocks of 3
  foreach $s (@block_widths) {
    add_proto qw/void/, "aom_sad${s}x${s}x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
  }
  specialize qw/aom_sad64x64x3            msa/;
  specialize qw/aom_sad32x32x3            msa/;
  specialize qw/aom_sad16x16x3 sse3 ssse3 msa/;
  specialize qw/aom_sad8x8x3   sse3       msa/;
  specialize qw/aom_sad4x4x3   sse3       msa/;
Yaowu Xu's avatar
Yaowu Xu committed
847

James Zern's avatar
James Zern committed
848 849 850 851 852 853
  add_proto qw/void/, "aom_sad16x8x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
  specialize qw/aom_sad16x8x3 sse3 ssse3 msa/;
  add_proto qw/void/, "aom_sad8x16x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
  specialize qw/aom_sad8x16x3 sse3 msa/;

  # Blocks of 8
Yaowu Xu's avatar
Yaowu Xu committed
854
  foreach $s (@block_widths) {
James Zern's avatar
James Zern committed
855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878
    add_proto qw/void/, "aom_sad${s}x${s}x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
  }
  specialize qw/aom_sad64x64x8        msa/;
  specialize qw/aom_sad32x32x8        msa/;
  specialize qw/aom_sad16x16x8 sse4_1 msa/;
  specialize qw/aom_sad8x8x8   sse4_1 msa/;
  specialize qw/aom_sad4x4x8   sse4_1 msa/;

  add_proto qw/void/, "aom_sad16x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
  specialize qw/aom_sad16x8x8 sse4_1 msa/;
  add_proto qw/void/, "aom_sad8x16x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
  specialize qw/aom_sad8x16x8 sse4_1 msa/;
  add_proto qw/void/, "aom_sad8x4x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
  specialize qw/aom_sad8x4x8 msa/;
  add_proto qw/void/, "aom_sad4x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
  specialize qw/aom_sad4x8x8 msa/;

  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
    foreach $s (@block_widths) {
      # Blocks of 3
      add_proto qw/void/, "aom_highbd_sad${s}x${s}x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
      # Blocks of 8
      add_proto qw/void/, "aom_highbd_sad${s}x${s}x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
    }
Yaowu Xu's avatar
Yaowu Xu committed
879
    # Blocks of 3
James Zern's avatar
James Zern committed
880 881
    add_proto qw/void/, "aom_highbd_sad16x8x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
    add_proto qw/void/, "aom_highbd_sad8x16x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
Yaowu Xu's avatar
Yaowu Xu committed
882
    # Blocks of 8
James Zern's avatar
James Zern committed
883 884 885 886
    add_proto qw/void/, "aom_highbd_sad16x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
    add_proto qw/void/, "aom_highbd_sad8x16x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
    add_proto qw/void/, "aom_highbd_sad8x4x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
    add_proto qw/void/, "aom_highbd_sad4x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
Yaowu Xu's avatar
Yaowu Xu committed
887 888 889 890 891 892 893
  }

  #
  # Multi-block SAD, comparing a reference to N independent blocks
  #
  foreach (@block_sizes) {
    ($w, $h) = @$_;
James Zern's avatar
James Zern committed
894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923
    add_proto qw/void/, "aom_sad${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
  }

  specialize qw/aom_sad128x128x4d avx2          sse2/;
  specialize qw/aom_sad128x64x4d  avx2          sse2/;
  specialize qw/aom_sad64x128x4d  avx2          sse2/;
  specialize qw/aom_sad64x64x4d   avx2 neon msa sse2/;
  specialize qw/aom_sad64x32x4d   avx2      msa sse2/;
  specialize qw/aom_sad32x64x4d   avx2      msa sse2/;
  specialize qw/aom_sad32x32x4d   avx2 neon msa sse2/;
  specialize qw/aom_sad32x16x4d             msa sse2/;
  specialize qw/aom_sad16x32x4d             msa sse2/;
  specialize qw/aom_sad16x16x4d        neon msa sse2/;
  specialize qw/aom_sad16x8x4d              msa sse2/;
  specialize qw/aom_sad8x16x4d              msa sse2/;
  specialize qw/aom_sad8x8x4d               msa sse2/;
  specialize qw/aom_sad8x4x4d               msa sse2/;
  specialize qw/aom_sad4x8x4d               msa sse2/;
  specialize qw/aom_sad4x4x4d               msa sse2/;

  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
    #
    # Multi-block SAD, comparing a reference to N independent blocks
    #
    foreach (@block_sizes) {
      ($w, $h) = @$_;
      add_proto qw/void/, "aom_highbd_sad${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
      if ($w != 128 && $h != 128) {
        specialize "aom_highbd_sad${w}x${h}x4d", qw/sse2/;
      }
Yaowu Xu's avatar
Yaowu Xu committed
924
    }
James Zern's avatar
James Zern committed
925 926 927 928 929 930 931 932 933 934 935
    specialize qw/aom_highbd_sad128x128x4d avx2/;
    specialize qw/aom_highbd_sad128x64x4d  avx2/;
    specialize qw/aom_highbd_sad64x128x4d  avx2/;
    specialize qw/aom_highbd_sad64x64x4d   avx2/;
    specialize qw/aom_highbd_sad64x32x4d   avx2/;
    specialize qw/aom_highbd_sad32x64x4d   avx2/;
    specialize qw/aom_highbd_sad32x32x4d   avx2/;
    specialize qw/aom_highbd_sad32x16x4d   avx2/;
    specialize qw/aom_highbd_sad16x32x4d   avx2/;
    specialize qw/aom_highbd_sad16x16x4d   avx2/;
    specialize qw/aom_highbd_sad16x8x4d    avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
936 937
  }

James Zern's avatar
James Zern committed
938 939 940 941 942 943
  #
  # Structured Similarity (SSIM)
  #
  if (aom_config("CONFIG_INTERNAL_STATS") eq "yes") {
    add_proto qw/void aom_ssim_parms_8x8/, "const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
    specialize qw/aom_ssim_parms_8x8/, "$sse2_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
944

James Zern's avatar
James Zern committed
945 946
    add_proto qw/void aom_ssim_parms_16x16/, "const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
    specialize qw/aom_ssim_parms_16x16/, "$sse2_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
947

James Zern's avatar
James Zern committed
948 949 950
    if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
      add_proto qw/void aom_highbd_ssim_parms_8x8/, "const uint16_t *s, int sp, const uint16_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
    }
Yaowu Xu's avatar
Yaowu Xu committed
951
  }
952
}  # CONFIG_AV1_ENCODER
Yaowu Xu's avatar
Yaowu Xu committed
953

954
if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
955

James Zern's avatar
James Zern committed
956 957 958 959
  #
  # Specialty Variance
  #
  add_proto qw/void aom_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
Yaowu Xu's avatar
Yaowu Xu committed
960

James Zern's avatar
James Zern committed
961
  add_proto qw/void aom_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
Yaowu Xu's avatar
Yaowu Xu committed
962

James Zern's avatar
James Zern committed
963 964
  specialize qw/aom_get16x16var sse2 avx2 neon msa/;
  specialize qw/aom_get8x8var   sse2      neon msa/;
Yaowu Xu's avatar
Yaowu Xu committed
965 966


James Zern's avatar
James Zern committed
967 968 969 970
  add_proto qw/unsigned int aom_mse16x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
  add_proto qw/unsigned int aom_mse16x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
  add_proto qw/unsigned int aom_mse8x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
  add_proto qw/unsigned int aom_mse8x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
Yaowu Xu's avatar
Yaowu Xu committed
971

James Zern's avatar
James Zern committed
972 973 974 975
  specialize qw/aom_mse16x16          sse2 avx2 neon msa/;
  specialize qw/aom_mse16x8           sse2           msa/;
  specialize qw/aom_mse8x16           sse2           msa/;
  specialize qw/aom_mse8x8            sse2           msa/;
Yaowu Xu's avatar
Yaowu Xu committed
976

James Zern's avatar
James Zern committed
977 978 979 980
  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
    foreach $bd (8, 10, 12) {
      add_proto qw/void/, "aom_highbd_${bd}_get16x16var", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
      add_proto qw/void/, "aom_highbd_${bd}_get8x8var", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
Yaowu Xu's avatar
Yaowu Xu committed
981

James Zern's avatar
James Zern committed
982 983 984 985
      add_proto qw/unsigned int/, "aom_highbd_${bd}_mse16x16", "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
      add_proto qw/unsigned int/, "aom_highbd_${bd}_mse16x8", "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
      add_proto qw/unsigned int/, "aom_highbd_${bd}_mse8x16", "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
      add_proto qw/unsigned int/, "aom_highbd_${bd}_mse8x8", "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
Yaowu Xu's avatar
Yaowu Xu committed
986

James Zern's avatar
James Zern committed
987 988 989
      specialize "aom_highbd_${bd}_mse16x16", qw/sse2/;
      specialize "aom_highbd_${bd}_mse8x8", qw/sse2/;
    }
Yaowu Xu's avatar
Yaowu Xu committed
990 991
  }

James Zern's avatar
James Zern committed
992 993 994 995 996 997 998
  #
  # ...
  #
  add_proto qw/void aom_upsampled_pred/, "uint8_t *comp_pred, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref, int ref_stride";
  specialize qw/aom_upsampled_pred sse2/;
  add_proto qw/void aom_comp_avg_upsampled_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref, int ref_stride";
  specialize qw/aom_comp_avg_upsampled_pred sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
999

James Zern's avatar
James Zern committed
1000 1001 1002 1003 1004 1005
  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
    add_proto qw/void aom_highbd_upsampled_pred/, "uint16_t *comp_pred, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref8, int ref_stride, int bd";
    specialize qw/aom_highbd_upsampled_pred sse2/;
    add_proto qw/void aom_highbd_comp_avg_upsampled_pred/, "uint16_t *comp_pred, const uint8_t *pred8, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref8, int ref_stride, int bd";
    specialize qw/aom_highbd_comp_avg_upsampled_pred sse2/;
  }
Yaowu Xu's avatar
Yaowu Xu committed
1006

James Zern's avatar
James Zern committed
1007 1008 1009 1010 1011
  #
  # ...
  #
  add_proto qw/unsigned int aom_get_mb_ss/, "const int16_t *";
  add_proto qw/unsigned int aom_get4x4sse_cs/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride";
Yaowu Xu's avatar
Yaowu Xu committed
1012

James Zern's avatar
James Zern committed
1013 1014
  specialize qw/aom_get_mb_ss sse2 msa/;
  specialize qw/aom_get4x4sse_cs neon msa/;
Yaowu Xu's avatar
Yaowu Xu committed
1015

James Zern's avatar
James Zern committed
1016 1017 1018
  #
  # Variance / Subpixel Variance / Subpixel Avg Variance
  #
Jingning Han's avatar
Jingning Han committed
1019 1020
  add_proto qw/unsigned int/, "aom_variance2x2", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";

1021 1022 1023 1024
  add_proto qw/unsigned int/, "aom_variance2x4", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";

  add_proto qw/unsigned int/, "aom_variance4x2", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";

James Zern's avatar
James Zern committed
1025 1026 1027 1028 1029 1030
  foreach (@block_sizes) {
    ($w, $h) = @$_;
    add_proto qw/unsigned int/, "aom_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
    add_proto qw/uint32_t/, "aom_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
    add_proto qw/uint32_t/, "aom_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
  }
Yaowu Xu's avatar
Yaowu Xu committed
1031

James Zern's avatar
James Zern committed
1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058
  specialize qw/aom_variance64x64     sse2 avx2 neon msa/;
  specialize qw/aom_variance64x32     sse2 avx2 neon msa/;
  specialize qw/aom_variance32x64     sse2      neon msa/;
  specialize qw/aom_variance32x32     sse2 avx2 neon msa/;
  specialize qw/aom_variance32x16     sse2 avx2 msa/;
  specialize qw/aom_variance16x32     sse2      msa/;
  specialize qw/aom_variance16x16     sse2 avx2 neon msa/;
  specialize qw/aom_variance16x8      sse2      neon msa/;
  specialize qw/aom_variance8x16      sse2      neon msa/;
  specialize qw/aom_variance8x8       sse2      neon msa/;
  specialize qw/aom_variance8x4       sse2           msa/;
  specialize qw/aom_variance4x8       sse2           msa/;
  specialize qw/aom_variance4x4       sse2           msa/;

  specialize qw/aom_sub_pixel_variance64x64     avx2 neon msa sse2 ssse3/;
  specialize qw/aom_sub_pixel_variance64x32               msa sse2 ssse3/;
  specialize qw/aom_sub_pixel_variance32x64               msa sse2 ssse3/;
  specialize qw/aom_sub_pixel_variance32x32     avx2 neon msa sse2 ssse3/;
  specialize qw/aom_sub_pixel_variance32x16               msa sse2 ssse3/;
  specialize qw/aom_sub_pixel_variance16x32               msa sse2 ssse3/;