aom_dsp_rtcd_defs.pl 98.9 KB
Newer Older
Yaowu Xu's avatar
Yaowu Xu committed
1 2 3 4 5 6 7 8
sub aom_dsp_forward_decls() {
print <<EOF
/*
 * DSP
 */

#include "aom/aom_integer.h"
#include "aom_dsp/aom_dsp_common.h"
9
#include "av1/common/enums.h"
Yaowu Xu's avatar
Yaowu Xu committed
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42

EOF
}
forward_decls qw/aom_dsp_forward_decls/;

# optimizations which depend on multiple features
$avx2_ssse3 = '';
if ((aom_config("HAVE_AVX2") eq "yes") && (aom_config("HAVE_SSSE3") eq "yes")) {
  $avx2_ssse3 = 'avx2';
}

# functions that are 64 bit only.
$mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 = $avx_x86_64 = $avx2_x86_64 = '';
if ($opts{arch} eq "x86_64") {
  $mmx_x86_64 = 'mmx';
  $sse2_x86_64 = 'sse2';
  $ssse3_x86_64 = 'ssse3';
  $avx_x86_64 = 'avx';
  $avx2_x86_64 = 'avx2';
}

if (aom_config("CONFIG_EXT_PARTITION") eq "yes") {
  @block_widths = (4, 8, 16, 32, 64, 128)
} else {
  @block_widths = (4, 8, 16, 32, 64)
}

@block_sizes = ();
foreach $w (@block_widths) {
  foreach $h (@block_widths) {
    push @block_sizes, [$w, $h] if ($w <= 2*$h && $h <= 2*$w) ;
  }
}
43 44 45 46 47 48
if (aom_config("CONFIG_EXT_PARTITION_TYPES")) {
  push @block_sizes, [4, 16];
  push @block_sizes, [16, 4];
  push @block_sizes, [8, 32];
  push @block_sizes, [32, 8];
}
Yaowu Xu's avatar
Yaowu Xu committed
49

50 51 52 53
@tx_dims = (2, 4, 8, 16, 32);
if (aom_config("CONFIG_TX64X64") eq "yes") {
  push @tx_dims, '64';
}
54

55 56 57 58 59 60 61 62
@tx_sizes = ();
foreach $w (@tx_dims) {
  push @tx_sizes, [$w, $w];
  foreach $h (@tx_dims) {
    push @tx_sizes, [$w, $h] if ($w >=4 && $h >=4 && ($w == 2*$h || $h == 2*$w));
  }
}

63
@pred_names = qw/dc dc_top dc_left dc_128 v h d207e d63e d45e d117 d135 d153/;
64 65
if (aom_config("CONFIG_ALT_INTRA") eq "yes") {
  push @pred_names, qw/paeth smooth/;
Urvang Joshi's avatar
Urvang Joshi committed
66 67 68
  if (aom_config("CONFIG_SMOOTH_HV") eq "yes") {
    push @pred_names, qw/smooth_v smooth_h/;
  }
Urvang Joshi's avatar
Urvang Joshi committed
69
} else {
70 71
  push @pred_names, 'tm';
}
72

73 74 75
#
# Intra prediction
#
76

77 78
foreach (@tx_sizes) {
  ($w, $h) = @$_;
79 80 81
  foreach $pred_name (@pred_names) {
    add_proto "void", "aom_${pred_name}_predictor_${w}x${h}",
              "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
82
    if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
83 84 85 86 87
      add_proto "void", "aom_highbd_${pred_name}_predictor_${w}x${h}",
                "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    }
  }
}
88

89
specialize qw/aom_d63e_predictor_4x4 ssse3/;
Yaowu Xu's avatar
Yaowu Xu committed
90 91 92 93
specialize qw/aom_h_predictor_4x4 neon dspr2 msa sse2/;
specialize qw/aom_d135_predictor_4x4 neon/;
specialize qw/aom_d153_predictor_4x4 ssse3/;
specialize qw/aom_v_predictor_4x4 neon msa sse2/;
94
if (aom_config("CONFIG_ALT_INTRA") eq "") {
Urvang Joshi's avatar
Urvang Joshi committed
95 96
  specialize qw/aom_tm_predictor_4x4 neon dspr2 msa sse2/;
}  # CONFIG_ALT_INTRA
Yaowu Xu's avatar
Yaowu Xu committed
97 98 99 100 101 102 103
specialize qw/aom_dc_predictor_4x4 dspr2 msa neon sse2/;
specialize qw/aom_dc_top_predictor_4x4 msa neon sse2/;
specialize qw/aom_dc_left_predictor_4x4 msa neon sse2/;
specialize qw/aom_dc_128_predictor_4x4 msa neon sse2/;
specialize qw/aom_h_predictor_8x8 neon dspr2 msa sse2/;
specialize qw/aom_d153_predictor_8x8 ssse3/;
specialize qw/aom_v_predictor_8x8 neon msa sse2/;
104
if (aom_config("CONFIG_ALT_INTRA") eq "") {
Urvang Joshi's avatar
Urvang Joshi committed
105 106
  specialize qw/aom_tm_predictor_8x8 neon dspr2 msa sse2/;
}  # CONFIG_ALT_INTRA
Yaowu Xu's avatar
Yaowu Xu committed
107 108 109 110 111 112 113
specialize qw/aom_dc_predictor_8x8 dspr2 neon msa sse2/;
specialize qw/aom_dc_top_predictor_8x8 neon msa sse2/;
specialize qw/aom_dc_left_predictor_8x8 neon msa sse2/;
specialize qw/aom_dc_128_predictor_8x8 neon msa sse2/;
specialize qw/aom_h_predictor_16x16 neon dspr2 msa sse2/;
specialize qw/aom_d153_predictor_16x16 ssse3/;
specialize qw/aom_v_predictor_16x16 neon msa sse2/;
114
if (aom_config("CONFIG_ALT_INTRA") eq "") {
Urvang Joshi's avatar
Urvang Joshi committed
115 116
  specialize qw/aom_tm_predictor_16x16 neon msa sse2/;
}  # CONFIG_ALT_INTRA
Yaowu Xu's avatar
Yaowu Xu committed
117 118 119 120 121 122 123
specialize qw/aom_dc_predictor_16x16 dspr2 neon msa sse2/;
specialize qw/aom_dc_top_predictor_16x16 neon msa sse2/;
specialize qw/aom_dc_left_predictor_16x16 neon msa sse2/;
specialize qw/aom_dc_128_predictor_16x16 neon msa sse2/;
specialize qw/aom_h_predictor_32x32 neon msa sse2/;
specialize qw/aom_d153_predictor_32x32 ssse3/;
specialize qw/aom_v_predictor_32x32 neon msa sse2/;
124
if (aom_config("CONFIG_ALT_INTRA") eq "") {
Urvang Joshi's avatar
Urvang Joshi committed
125 126
  specialize qw/aom_tm_predictor_32x32 neon msa sse2/;
}  # CONFIG_ALT_INTRA
Yaowu Xu's avatar
Yaowu Xu committed
127 128 129 130 131
specialize qw/aom_dc_predictor_32x32 msa neon sse2/;
specialize qw/aom_dc_top_predictor_32x32 msa neon sse2/;
specialize qw/aom_dc_left_predictor_32x32 msa neon sse2/;
specialize qw/aom_dc_128_predictor_32x32 msa neon sse2/;

132
if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
133
  specialize qw/aom_highbd_v_predictor_4x4 sse2/;
134
  if (aom_config("CONFIG_ALT_INTRA") eq "") {
Urvang Joshi's avatar
Urvang Joshi committed
135 136
    specialize qw/aom_highbd_tm_predictor_4x4 sse2/;
  }  # CONFIG_ALT_INTRA
Yaowu Xu's avatar
Yaowu Xu committed
137 138
  specialize qw/aom_highbd_dc_predictor_4x4 sse2/;
  specialize qw/aom_highbd_v_predictor_8x8 sse2/;
139
  if (aom_config("CONFIG_ALT_INTRA") eq "") {
Urvang Joshi's avatar
Urvang Joshi committed
140 141
    specialize qw/aom_highbd_tm_predictor_8x8 sse2/;
  }  # CONFIG_ALT_INTRA
Yaowu Xu's avatar
Yaowu Xu committed
142 143
  specialize qw/aom_highbd_dc_predictor_8x8 sse2/;;
  specialize qw/aom_highbd_v_predictor_16x16 sse2/;
144
  if (aom_config("CONFIG_ALT_INTRA") eq "") {
Urvang Joshi's avatar
Urvang Joshi committed
145 146
    specialize qw/aom_highbd_tm_predictor_16x16 sse2/;
  }  # CONFIG_ALT_INTRA
Yaowu Xu's avatar
Yaowu Xu committed
147 148
  specialize qw/aom_highbd_dc_predictor_16x16 sse2/;
  specialize qw/aom_highbd_v_predictor_32x32 sse2/;
149
  if (aom_config("CONFIG_ALT_INTRA") eq "") {
Urvang Joshi's avatar
Urvang Joshi committed
150 151
    specialize qw/aom_highbd_tm_predictor_32x32 sse2/;
  }  # CONFIG_ALT_INTRA
Yaowu Xu's avatar
Yaowu Xu committed
152
  specialize qw/aom_highbd_dc_predictor_32x32 sse2/;
153
}  # CONFIG_HIGHBITDEPTH
Yaowu Xu's avatar
Yaowu Xu committed
154 155 156 157

#
# Sub Pixel Filters
#
Fergus Simpson's avatar
Fergus Simpson committed
158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178
add_proto qw/void aom_convolve_copy/,             "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve_avg/,              "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8/,                 "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_horiz/,           "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_vert/,            "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_avg/,             "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_avg_horiz/,       "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_avg_vert/,        "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_scaled_2d/,                 "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_scaled_horiz/,              "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_scaled_vert/,               "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_scaled_avg_2d/,             "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_scaled_avg_horiz/,          "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
add_proto qw/void aom_scaled_avg_vert/,           "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";

add_proto qw/void aom_convolve8_horiz_scale/,     "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int subpel_x, int x_step_q4, const int16_t *filter_y, int subpel_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_vert_scale/,      "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int subpel_x, int x_step_q4, const int16_t *filter_y, int subpel_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_avg_horiz_scale/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int subpel_x, int x_step_q4, const int16_t *filter_y, int subpel_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_avg_vert_scale/,  "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int subpel_x, int x_step_q4, const int16_t *filter_y, int subpel_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_scale/,           "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int subpel_x, int x_step_q4, const int16_t *filter_y, int subpel_y, int y_step_q4, int w, int h";
add_proto qw/void aom_convolve8_avg_scale/,       "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int subpel_x, int x_step_q4, const int16_t *filter_y, int subpel_y, int y_step_q4, int w, int h";
Yaowu Xu's avatar
Yaowu Xu committed
179 180 181 182 183 184 185 186 187 188 189

specialize qw/aom_convolve_copy       sse2      /;
specialize qw/aom_convolve_avg        sse2      /;
specialize qw/aom_convolve8           sse2 ssse3/, "$avx2_ssse3";
specialize qw/aom_convolve8_horiz     sse2 ssse3/, "$avx2_ssse3";
specialize qw/aom_convolve8_vert      sse2 ssse3/, "$avx2_ssse3";
specialize qw/aom_convolve8_avg       sse2 ssse3/;
specialize qw/aom_convolve8_avg_horiz sse2 ssse3/;
specialize qw/aom_convolve8_avg_vert  sse2 ssse3/;
specialize qw/aom_scaled_2d                ssse3/;

190 191 192 193
if (aom_config("CONFIG_LOOP_RESTORATION") eq "yes") {
  add_proto qw/void aom_convolve8_add_src/,       "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
  add_proto qw/void aom_convolve8_add_src_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
  add_proto qw/void aom_convolve8_add_src_vert/,  "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
194 195 196
  add_proto qw/void aom_convolve8_add_src_hip/,       "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
  add_proto qw/void aom_convolve8_add_src_horiz_hip/, "const uint8_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
  add_proto qw/void aom_convolve8_add_src_vert_hip/,  "const uint16_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
197

198 199 200
  specialize qw/aom_convolve8_add_src ssse3/;
  specialize qw/aom_convolve8_add_src_horiz ssse3/;
  specialize qw/aom_convolve8_add_src_vert ssse3/;
201
  specialize qw/aom_convolve8_add_src_hip sse2/;
202 203
}  # CONFIG_LOOP_RESTORATION

Yaowu Xu's avatar
Yaowu Xu committed
204 205 206 207 208 209 210 211 212 213 214 215
# TODO(any): These need to be extended to up to 128x128 block sizes
if (!(aom_config("CONFIG_AV1") eq "yes" && aom_config("CONFIG_EXT_PARTITION") eq "yes")) {
  specialize qw/aom_convolve_copy       neon dspr2 msa/;
  specialize qw/aom_convolve_avg        neon dspr2 msa/;
  specialize qw/aom_convolve8           neon dspr2 msa/;
  specialize qw/aom_convolve8_horiz     neon dspr2 msa/;
  specialize qw/aom_convolve8_vert      neon dspr2 msa/;
  specialize qw/aom_convolve8_avg       neon dspr2 msa/;
  specialize qw/aom_convolve8_avg_horiz neon dspr2 msa/;
  specialize qw/aom_convolve8_avg_vert  neon dspr2 msa/;
}

216
if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
217
  add_proto qw/void aom_highbd_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
218
  specialize qw/aom_highbd_convolve_copy sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
219 220

  add_proto qw/void aom_highbd_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
221
  specialize qw/aom_highbd_convolve_avg sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
222 223

  add_proto qw/void aom_highbd_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
224
  specialize qw/aom_highbd_convolve8 avx2/, "$sse2_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
225 226

  add_proto qw/void aom_highbd_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
227
  specialize qw/aom_highbd_convolve8_horiz avx2/, "$sse2_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
228 229

  add_proto qw/void aom_highbd_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
230
  specialize qw/aom_highbd_convolve8_vert avx2/, "$sse2_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
231 232

  add_proto qw/void aom_highbd_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
233
  specialize qw/aom_highbd_convolve8_avg avx2/, "$sse2_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
234 235

  add_proto qw/void aom_highbd_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
236
  specialize qw/aom_highbd_convolve8_avg_horiz avx2/, "$sse2_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
237 238

  add_proto qw/void aom_highbd_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
239
  specialize qw/aom_highbd_convolve8_avg_vert avx2/, "$sse2_x86_64";
240 241 242 243 244

  if (aom_config("CONFIG_LOOP_RESTORATION") eq "yes") {
    add_proto qw/void aom_highbd_convolve8_add_src/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
    add_proto qw/void aom_highbd_convolve8_add_src_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
    add_proto qw/void aom_highbd_convolve8_add_src_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
245 246 247
    add_proto qw/void aom_highbd_convolve8_add_src_hip/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
    add_proto qw/void aom_highbd_convolve8_add_src_horiz_hip/, "const uint8_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
    add_proto qw/void aom_highbd_convolve8_add_src_vert_hip/, "const uint16_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
248

249
    specialize qw/aom_highbd_convolve8_add_src/, "$sse2_x86_64";
250
    specialize qw/aom_highbd_convolve8_add_src_hip ssse3/;
251 252
    # The _horiz/_vert functions are currently unused, so we don't bother
    # specialising them.
253
  }  # CONFIG_LOOP_RESTORATION
254
}  # CONFIG_HIGHBITDEPTH
Yaowu Xu's avatar
Yaowu Xu committed
255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300

#
# Loopfilter
#
add_proto qw/void aom_lpf_vertical_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
specialize qw/aom_lpf_vertical_16 sse2 neon_asm dspr2 msa/;
$aom_lpf_vertical_16_neon_asm=aom_lpf_vertical_16_neon;

add_proto qw/void aom_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
specialize qw/aom_lpf_vertical_16_dual sse2 neon_asm dspr2 msa/;
$aom_lpf_vertical_16_dual_neon_asm=aom_lpf_vertical_16_dual_neon;

add_proto qw/void aom_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
specialize qw/aom_lpf_vertical_8 sse2 neon dspr2 msa/;

add_proto qw/void aom_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
specialize qw/aom_lpf_vertical_8_dual sse2 neon_asm dspr2 msa/;
$aom_lpf_vertical_8_dual_neon_asm=aom_lpf_vertical_8_dual_neon;

add_proto qw/void aom_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
specialize qw/aom_lpf_vertical_4 sse2 neon dspr2 msa/;

add_proto qw/void aom_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
specialize qw/aom_lpf_vertical_4_dual sse2 neon dspr2 msa/;

add_proto qw/void aom_lpf_horizontal_edge_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
specialize qw/aom_lpf_horizontal_edge_8 sse2 avx2 neon_asm dspr2 msa/;
$aom_lpf_horizontal_edge_8_neon_asm=aom_lpf_horizontal_edge_8_neon;

add_proto qw/void aom_lpf_horizontal_edge_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
specialize qw/aom_lpf_horizontal_edge_16 sse2 avx2 neon_asm dspr2 msa/;
$aom_lpf_horizontal_edge_16_neon_asm=aom_lpf_horizontal_edge_16_neon;

add_proto qw/void aom_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
specialize qw/aom_lpf_horizontal_8 sse2 neon dspr2 msa/;

add_proto qw/void aom_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
specialize qw/aom_lpf_horizontal_8_dual sse2 neon_asm dspr2 msa/;
$aom_lpf_horizontal_8_dual_neon_asm=aom_lpf_horizontal_8_dual_neon;

add_proto qw/void aom_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
specialize qw/aom_lpf_horizontal_4 sse2 neon dspr2 msa/;

add_proto qw/void aom_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
specialize qw/aom_lpf_horizontal_4_dual sse2 neon dspr2 msa/;

301
if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336
  add_proto qw/void aom_highbd_lpf_vertical_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
  specialize qw/aom_highbd_lpf_vertical_16 sse2/;

  add_proto qw/void aom_highbd_lpf_vertical_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
  specialize qw/aom_highbd_lpf_vertical_16_dual sse2/;

  add_proto qw/void aom_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
  specialize qw/aom_highbd_lpf_vertical_8 sse2/;

  add_proto qw/void aom_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
  specialize qw/aom_highbd_lpf_vertical_8_dual sse2/;

  add_proto qw/void aom_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
  specialize qw/aom_highbd_lpf_vertical_4 sse2/;

  add_proto qw/void aom_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
  specialize qw/aom_highbd_lpf_vertical_4_dual sse2/;

  add_proto qw/void aom_highbd_lpf_horizontal_edge_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
  specialize qw/aom_highbd_lpf_horizontal_edge_8 sse2/;

  add_proto qw/void aom_highbd_lpf_horizontal_edge_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
  specialize qw/aom_highbd_lpf_horizontal_edge_16 sse2/;

  add_proto qw/void aom_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
  specialize qw/aom_highbd_lpf_horizontal_8 sse2/;

  add_proto qw/void aom_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
  specialize qw/aom_highbd_lpf_horizontal_8_dual sse2/;

  add_proto qw/void aom_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
  specialize qw/aom_highbd_lpf_horizontal_4 sse2/;

  add_proto qw/void aom_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
  specialize qw/aom_highbd_lpf_horizontal_4_dual sse2/;
337
}  # CONFIG_HIGHBITDEPTH
Yaowu Xu's avatar
Yaowu Xu committed
338 339 340 341 342 343 344 345

#
# Encoder functions.
#

#
# Forward transform
#
346
if ((aom_config("CONFIG_AV1_ENCODER") eq "yes") || (aom_config("CONFIG_PVQ") eq "yes")){
347
  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
348 349 350 351 352 353 354
    add_proto qw/void aom_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct4x4 sse2/;

    add_proto qw/void aom_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct4x4_1 sse2/;

    add_proto qw/void aom_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
355
    specialize qw/aom_fdct8x8 sse2/, "$ssse3_x86_64";
356 357 358 359 360 361 362 363 364 365 366

    add_proto qw/void aom_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct16x16 sse2/;

    add_proto qw/void aom_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct32x32 sse2 avx2/;

    add_proto qw/void aom_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct32x32_rd sse2 avx2/;

    # High bit depth
367 368
    add_proto qw/void aom_highbd_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_highbd_fdct4x4 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
369

370 371
    add_proto qw/void aom_highbd_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_highbd_fdct8x8 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
372

373 374
    add_proto qw/void aom_highbd_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_highbd_fdct16x16 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
375

376 377
    add_proto qw/void aom_highbd_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_highbd_fdct32x32 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
378

379 380
    add_proto qw/void aom_highbd_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_highbd_fdct32x32_rd sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
381

382 383 384
  } else {
    add_proto qw/void aom_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct4x4 sse2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
385

386 387
    add_proto qw/void aom_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct4x4_1 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
388

389 390
    add_proto qw/void aom_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct8x8 sse2 neon msa/, "$ssse3_x86_64";
Yaowu Xu's avatar
Yaowu Xu committed
391

392 393
    add_proto qw/void aom_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct16x16 sse2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
394

395 396
    add_proto qw/void aom_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct32x32 sse2 avx2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
397

398 399
    add_proto qw/void aom_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
    specialize qw/aom_fdct32x32_rd sse2 avx2 msa/;
400
  }  # CONFIG_HIGHBITDEPTH
Yaowu Xu's avatar
Yaowu Xu committed
401 402 403 404 405 406 407 408 409 410 411 412 413 414
}  # CONFIG_AV1_ENCODER

#
# Inverse transform
if (aom_config("CONFIG_AV1") eq "yes") {
  add_proto qw/void aom_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";

  add_proto qw/void aom_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/aom_iwht4x4_16_add sse2/;

  add_proto qw/void aom_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

  add_proto qw/void aom_highbd_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";

415
  {
Yaowu Xu's avatar
Yaowu Xu committed
416 417 418 419 420 421 422
    add_proto qw/void aom_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/aom_idct4x4_16_add sse2/;

    add_proto qw/void aom_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/aom_idct4x4_1_add sse2/;

    add_proto qw/void aom_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
423
    specialize qw/aom_idct8x8_64_add sse2 ssse3/;
Yaowu Xu's avatar
Yaowu Xu committed
424 425

    add_proto qw/void aom_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
426
    specialize qw/aom_idct8x8_12_add sse2 ssse3/;
Yaowu Xu's avatar
Yaowu Xu committed
427 428 429 430 431

    add_proto qw/void aom_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/aom_idct8x8_1_add sse2/;

    add_proto qw/void aom_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
Yi Luo's avatar
Yi Luo committed
432
    specialize qw/aom_idct16x16_256_add sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
433

434
    add_proto qw/void aom_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
Yi Luo's avatar
Yi Luo committed
435
    specialize qw/aom_idct16x16_38_add avx2/;
436

Yaowu Xu's avatar
Yaowu Xu committed
437
    add_proto qw/void aom_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
Yi Luo's avatar
Yi Luo committed
438
    specialize qw/aom_idct16x16_10_add sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
439 440

    add_proto qw/void aom_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
Yi Luo's avatar
Yi Luo committed
441
    specialize qw/aom_idct16x16_1_add sse2 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
442 443

    add_proto qw/void aom_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
Yi Luo's avatar
Yi Luo committed
444
    specialize qw/aom_idct32x32_1024_add sse2 ssse3 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
445 446

    add_proto qw/void aom_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
Yi Luo's avatar
Yi Luo committed
447
    specialize qw/aom_idct32x32_135_add sse2 ssse3 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
448 449 450 451
    # Need to add 135 eob idct32x32 implementations.
    $aom_idct32x32_135_add_sse2=aom_idct32x32_1024_add_sse2;

    add_proto qw/void aom_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
Yi Luo's avatar
Yi Luo committed
452
    specialize qw/aom_idct32x32_34_add sse2 ssse3 avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
453 454

    add_proto qw/void aom_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
Yi Luo's avatar
Yi Luo committed
455
    specialize qw/aom_idct32x32_1_add sse2 avx2/;
456
  }
457
if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
458
} else {
459
  {
Yaowu Xu's avatar
Yaowu Xu committed
460 461 462 463 464 465 466 467 468 469
    add_proto qw/void aom_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/aom_idct4x4_1_add sse2 neon dspr2 msa/;

    add_proto qw/void aom_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/aom_idct4x4_16_add sse2 neon dspr2 msa/;

    add_proto qw/void aom_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/aom_idct8x8_1_add sse2 neon dspr2 msa/;

    add_proto qw/void aom_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
470
    specialize qw/aom_idct8x8_64_add sse2 ssse3 neon dspr2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
471 472

    add_proto qw/void aom_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
473
    specialize qw/aom_idct8x8_12_add sse2 ssse3 neon dspr2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
474 475

    add_proto qw/void aom_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
Yi Luo's avatar
Yi Luo committed
476
    specialize qw/aom_idct16x16_1_add sse2 avx2 neon dspr2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
477 478

    add_proto qw/void aom_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
Yi Luo's avatar
Yi Luo committed
479
    specialize qw/aom_idct16x16_256_add sse2 avx2 neon dspr2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
480

481
    add_proto qw/void aom_idct16x16_38_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
Yi Luo's avatar
Yi Luo committed
482
    specialize qw/aom_idct16x16_38_add avx2/;
483

Yaowu Xu's avatar
Yaowu Xu committed
484
    add_proto qw/void aom_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
Yi Luo's avatar
Yi Luo committed
485
    specialize qw/aom_idct16x16_10_add sse2 avx2 neon dspr2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
486 487

    add_proto qw/void aom_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
Yi Luo's avatar
Yi Luo committed
488
    specialize qw/aom_idct32x32_1024_add sse2 ssse3 avx2 neon dspr2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
489 490

    add_proto qw/void aom_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
Yi Luo's avatar
Yi Luo committed
491
    specialize qw/aom_idct32x32_135_add sse2 ssse3 avx2 neon dspr2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
492 493 494 495 496 497 498
    # Need to add 135 eob idct32x32 implementations.
    $aom_idct32x32_135_add_sse2=aom_idct32x32_1024_add_sse2;
    $aom_idct32x32_135_add_neon=aom_idct32x32_1024_add_neon;
    $aom_idct32x32_135_add_dspr2=aom_idct32x32_1024_add_dspr2;
    $aom_idct32x32_135_add_msa=aom_idct32x32_1024_add_msa;

    add_proto qw/void aom_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
Yi Luo's avatar
Yi Luo committed
499
    specialize qw/aom_idct32x32_34_add sse2 ssse3 avx2 neon dspr2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
500 501 502 503
    # Need to add 34 eob idct32x32 neon implementation.
    $aom_idct32x32_34_add_neon=aom_idct32x32_1024_add_neon;

    add_proto qw/void aom_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
Yi Luo's avatar
Yi Luo committed
504
    specialize qw/aom_idct32x32_1_add sse2 avx2 neon dspr2 msa/;
Yaowu Xu's avatar
Yaowu Xu committed
505 506 507 508 509 510

    add_proto qw/void aom_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/aom_iwht4x4_1_add msa/;

    add_proto qw/void aom_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/aom_iwht4x4_16_add msa sse2/;
511
  }
512
}  # CONFIG_HIGHBITDEPTH
Yaowu Xu's avatar
Yaowu Xu committed
513 514 515 516 517 518 519 520 521 522
}  # CONFIG_AV1

#
# Quantization
#
if (aom_config("CONFIG_AOM_QM") eq "yes") {
  if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
    add_proto qw/void aom_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";

    add_proto qw/void aom_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
523 524

    add_proto qw/void aom_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
Yaowu Xu's avatar
Yaowu Xu committed
525

526 527 528
    add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";

    add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
Yaowu Xu's avatar
Yaowu Xu committed
529

530
    add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
531

Yaowu Xu's avatar
Yaowu Xu committed
532 533 534 535 536 537 538 539 540
  }  # CONFIG_AV1_ENCODER
} else {
  if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
    add_proto qw/void aom_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
    specialize qw/aom_quantize_b sse2/, "$ssse3_x86_64", "$avx_x86_64";

    add_proto qw/void aom_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
    specialize qw/aom_quantize_b_32x32/, "$ssse3_x86_64", "$avx_x86_64";

541 542
    add_proto qw/void aom_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";

543 544 545 546 547
    add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
    specialize qw/aom_highbd_quantize_b sse2 avx2/;

    add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
    specialize qw/aom_highbd_quantize_b_32x32 sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
548

549
    add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
550

Yaowu Xu's avatar
Yaowu Xu committed
551 552 553 554 555 556 557 558 559 560 561 562 563
  }  # CONFIG_AV1_ENCODER
} # CONFIG_AOM_QM
if (aom_config("CONFIG_AV1") eq "yes") {
  #
  # Alpha blending with mask
  #
  add_proto qw/void aom_blend_a64_mask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx";
  add_proto qw/void aom_blend_a64_hmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w";
  add_proto qw/void aom_blend_a64_vmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w";
  specialize "aom_blend_a64_mask", qw/sse4_1/;
  specialize "aom_blend_a64_hmask", qw/sse4_1/;
  specialize "aom_blend_a64_vmask", qw/sse4_1/;

564
  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
565 566 567 568 569 570 571 572 573
    add_proto qw/void aom_highbd_blend_a64_mask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx, int bd";
    add_proto qw/void aom_highbd_blend_a64_hmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w, int bd";
    add_proto qw/void aom_highbd_blend_a64_vmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w, int bd";
    specialize "aom_highbd_blend_a64_mask", qw/sse4_1/;
    specialize "aom_highbd_blend_a64_hmask", qw/sse4_1/;
    specialize "aom_highbd_blend_a64_vmask", qw/sse4_1/;
  }
}  # CONFIG_AV1

574
if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
575 576 577 578 579 580 581 582 583 584
#
# Block subtraction
#
add_proto qw/void aom_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
specialize qw/aom_subtract_block neon msa sse2/;

if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
#
# Sum of Squares
#
585
add_proto qw/uint64_t aom_sum_squares_2d_i16/, "const int16_t *src, int stride, int width, int height";
Yaowu Xu's avatar
Yaowu Xu committed
586 587 588 589 590 591 592 593 594 595
specialize qw/aom_sum_squares_2d_i16 sse2/;

add_proto qw/uint64_t aom_sum_squares_i16/, "const int16_t *src, uint32_t N";
specialize qw/aom_sum_squares_i16 sse2/;
}


#
# Avg
#
596
if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
597 598 599 600
  #
  # Avg
  #
  specialize qw/aom_avg_8x8 sse2 neon msa/;
601
  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
602 603 604 605 606 607 608 609 610
    add_proto qw/void aom_highbd_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride, int bd";
    specialize qw/aom_highbd_subtract_block sse2/;
  }

  #
  # Minmax
  #
  add_proto qw/void aom_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
  specialize qw/aom_minmax_8x8 sse2 neon/;
611
  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
612 613 614 615 616 617 618 619 620 621 622 623
    add_proto qw/void aom_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
  }

  add_proto qw/void aom_hadamard_8x8/, "const int16_t *src_diff, int src_stride, int16_t *coeff";
  specialize qw/aom_hadamard_8x8 sse2 neon/, "$ssse3_x86_64";

  add_proto qw/void aom_hadamard_16x16/, "const int16_t *src_diff, int src_stride, int16_t *coeff";
  specialize qw/aom_hadamard_16x16 sse2 neon/;

  add_proto qw/int aom_satd/, "const int16_t *coeff, int length";
  specialize qw/aom_satd sse2 neon/;

624
  add_proto qw/void aom_int_pro_row/, "int16_t *hbuf, const uint8_t *ref, int ref_stride, int height";
Yaowu Xu's avatar
Yaowu Xu committed
625 626
  specialize qw/aom_int_pro_row sse2 neon/;

627
  add_proto qw/int16_t aom_int_pro_col/, "const uint8_t *ref, int width";
Yaowu Xu's avatar
Yaowu Xu committed
628 629
  specialize qw/aom_int_pro_col sse2 neon/;

630
  add_proto qw/int aom_vector_var/, "const int16_t *ref, const int16_t *src, int bwl";
Yaowu Xu's avatar
Yaowu Xu committed
631 632 633 634 635 636 637 638 639 640 641 642
  specialize qw/aom_vector_var neon sse2/;
}  # CONFIG_AV1_ENCODER

#
# Single block SAD / Single block Avg SAD
#
foreach (@block_sizes) {
  ($w, $h) = @$_;
  add_proto qw/unsigned int/, "aom_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
  add_proto qw/unsigned int/, "aom_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
}

643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658
specialize qw/aom_sad128x128    avx2          sse2/;
specialize qw/aom_sad128x64     avx2          sse2/;
specialize qw/aom_sad64x128     avx2          sse2/;
specialize qw/aom_sad64x64      avx2 neon msa sse2/;
specialize qw/aom_sad64x32      avx2      msa sse2/;
specialize qw/aom_sad32x64      avx2      msa sse2/;
specialize qw/aom_sad32x32      avx2 neon msa sse2/;
specialize qw/aom_sad32x16      avx2      msa sse2/;
specialize qw/aom_sad16x32                msa sse2/;
specialize qw/aom_sad16x16           neon msa sse2/;
specialize qw/aom_sad16x8            neon msa sse2/;
specialize qw/aom_sad8x16            neon msa sse2/;
specialize qw/aom_sad8x8             neon msa sse2/;
specialize qw/aom_sad8x4                  msa sse2/;
specialize qw/aom_sad4x8                  msa sse2/;
specialize qw/aom_sad4x4             neon msa sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
659

660 661 662
specialize qw/aom_sad128x128_avg avx2     sse2/;
specialize qw/aom_sad128x64_avg  avx2     sse2/;
specialize qw/aom_sad64x128_avg  avx2     sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
663 664 665 666
specialize qw/aom_sad64x64_avg   avx2 msa sse2/;
specialize qw/aom_sad64x32_avg   avx2 msa sse2/;
specialize qw/aom_sad32x64_avg   avx2 msa sse2/;
specialize qw/aom_sad32x32_avg   avx2 msa sse2/;
667
specialize qw/aom_sad32x16_avg   avx2 msa sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
668 669 670 671 672 673 674 675 676
specialize qw/aom_sad16x32_avg        msa sse2/;
specialize qw/aom_sad16x16_avg        msa sse2/;
specialize qw/aom_sad16x8_avg         msa sse2/;
specialize qw/aom_sad8x16_avg         msa sse2/;
specialize qw/aom_sad8x8_avg          msa sse2/;
specialize qw/aom_sad8x4_avg          msa sse2/;
specialize qw/aom_sad4x8_avg          msa sse2/;
specialize qw/aom_sad4x4_avg          msa sse2/;

677
if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
678 679 680 681 682 683 684 685 686
  foreach (@block_sizes) {
    ($w, $h) = @$_;
    add_proto qw/unsigned int/, "aom_highbd_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
    add_proto qw/unsigned int/, "aom_highbd_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
    if ($w != 128 && $h != 128 && $w != 4) {
      specialize "aom_highbd_sad${w}x${h}", qw/sse2/;
      specialize "aom_highbd_sad${w}x${h}_avg", qw/sse2/;
    }
  }
687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709
  specialize qw/aom_highbd_sad128x128 avx2/;
  specialize qw/aom_highbd_sad128x64  avx2/;
  specialize qw/aom_highbd_sad64x128  avx2/;
  specialize qw/aom_highbd_sad64x64   avx2/;
  specialize qw/aom_highbd_sad64x32   avx2/;
  specialize qw/aom_highbd_sad32x64   avx2/;
  specialize qw/aom_highbd_sad32x32   avx2/;
  specialize qw/aom_highbd_sad32x16   avx2/;
  specialize qw/aom_highbd_sad16x32   avx2/;
  specialize qw/aom_highbd_sad16x16   avx2/;
  specialize qw/aom_highbd_sad16x8    avx2/;

  specialize qw/aom_highbd_sad128x128_avg avx2/;
  specialize qw/aom_highbd_sad128x64_avg  avx2/;
  specialize qw/aom_highbd_sad64x128_avg  avx2/;
  specialize qw/aom_highbd_sad64x64_avg   avx2/;
  specialize qw/aom_highbd_sad64x32_avg   avx2/;
  specialize qw/aom_highbd_sad32x64_avg   avx2/;
  specialize qw/aom_highbd_sad32x32_avg   avx2/;
  specialize qw/aom_highbd_sad32x16_avg   avx2/;
  specialize qw/aom_highbd_sad16x32_avg   avx2/;
  specialize qw/aom_highbd_sad16x16_avg   avx2/;
  specialize qw/aom_highbd_sad16x8_avg    avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
710 711 712 713 714 715 716 717
}

#
# Masked SAD
#
if (aom_config("CONFIG_EXT_INTER") eq "yes") {
  foreach (@block_sizes) {
    ($w, $h) = @$_;
David Barker's avatar
David Barker committed
718
    add_proto qw/unsigned int/, "aom_masked_sad${w}x${h}", "const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask";
719
    specialize "aom_masked_sad${w}x${h}", qw/ssse3/;
Yaowu Xu's avatar
Yaowu Xu committed
720 721
  }

722
  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
723 724
    foreach (@block_sizes) {
      ($w, $h) = @$_;
David Barker's avatar
David Barker committed
725
      add_proto qw/unsigned int/, "aom_highbd_masked_sad${w}x${h}", "const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask";
726
      specialize "aom_highbd_masked_sad${w}x${h}", qw/ssse3/;
Yaowu Xu's avatar
Yaowu Xu committed
727 728 729 730 731 732 733
    }
  }
}

#
# OBMC SAD
#
Yue Chen's avatar
Yue Chen committed
734
if (aom_config("CONFIG_MOTION_VAR") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
735 736 737 738 739 740
  foreach (@block_sizes) {
    ($w, $h) = @$_;
    add_proto qw/unsigned int/, "aom_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask";
    specialize "aom_obmc_sad${w}x${h}", qw/sse4_1/;
  }

741
  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786
    foreach (@block_sizes) {
      ($w, $h) = @$_;
      add_proto qw/unsigned int/, "aom_highbd_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask";
      specialize "aom_highbd_obmc_sad${w}x${h}", qw/sse4_1/;
    }
  }
}

#
# Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally
#
# Blocks of 3
foreach $s (@block_widths) {
  add_proto qw/void/, "aom_sad${s}x${s}x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
}
specialize qw/aom_sad64x64x3            msa/;
specialize qw/aom_sad32x32x3            msa/;
specialize qw/aom_sad16x16x3 sse3 ssse3 msa/;
specialize qw/aom_sad8x8x3   sse3       msa/;
specialize qw/aom_sad4x4x3   sse3       msa/;

add_proto qw/void/, "aom_sad16x8x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
specialize qw/aom_sad16x8x3 sse3 ssse3 msa/;
add_proto qw/void/, "aom_sad8x16x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
specialize qw/aom_sad8x16x3 sse3 msa/;

# Blocks of 8
foreach $s (@block_widths) {
  add_proto qw/void/, "aom_sad${s}x${s}x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
}
specialize qw/aom_sad64x64x8        msa/;
specialize qw/aom_sad32x32x8        msa/;
specialize qw/aom_sad16x16x8 sse4_1 msa/;
specialize qw/aom_sad8x8x8   sse4_1 msa/;
specialize qw/aom_sad4x4x8   sse4_1 msa/;

add_proto qw/void/, "aom_sad16x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
specialize qw/aom_sad16x8x8 sse4_1 msa/;
add_proto qw/void/, "aom_sad8x16x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
specialize qw/aom_sad8x16x8 sse4_1 msa/;
add_proto qw/void/, "aom_sad8x4x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
specialize qw/aom_sad8x4x8 msa/;
add_proto qw/void/, "aom_sad4x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
specialize qw/aom_sad4x8x8 msa/;

787
if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811
  foreach $s (@block_widths) {
    # Blocks of 3
    add_proto qw/void/, "aom_highbd_sad${s}x${s}x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
    # Blocks of 8
    add_proto qw/void/, "aom_highbd_sad${s}x${s}x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
  }
  # Blocks of 3
  add_proto qw/void/, "aom_highbd_sad16x8x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
  add_proto qw/void/, "aom_highbd_sad8x16x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
  # Blocks of 8
  add_proto qw/void/, "aom_highbd_sad16x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
  add_proto qw/void/, "aom_highbd_sad8x16x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
  add_proto qw/void/, "aom_highbd_sad8x4x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
  add_proto qw/void/, "aom_highbd_sad4x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
}

#
# Multi-block SAD, comparing a reference to N independent blocks
#
foreach (@block_sizes) {
  ($w, $h) = @$_;
  add_proto qw/void/, "aom_sad${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
}

812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827
specialize qw/aom_sad128x128x4d avx2          sse2/;
specialize qw/aom_sad128x64x4d  avx2          sse2/;
specialize qw/aom_sad64x128x4d  avx2          sse2/;
specialize qw/aom_sad64x64x4d   avx2 neon msa sse2/;
specialize qw/aom_sad64x32x4d   avx2      msa sse2/;
specialize qw/aom_sad32x64x4d   avx2      msa sse2/;
specialize qw/aom_sad32x32x4d   avx2 neon msa sse2/;
specialize qw/aom_sad32x16x4d             msa sse2/;
specialize qw/aom_sad16x32x4d             msa sse2/;
specialize qw/aom_sad16x16x4d        neon msa sse2/;
specialize qw/aom_sad16x8x4d              msa sse2/;
specialize qw/aom_sad8x16x4d              msa sse2/;
specialize qw/aom_sad8x8x4d               msa sse2/;
specialize qw/aom_sad8x4x4d               msa sse2/;
specialize qw/aom_sad4x8x4d               msa sse2/;
specialize qw/aom_sad4x4x4d               msa sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
828

829
if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
830 831 832 833 834 835 836 837 838 839
  #
  # Multi-block SAD, comparing a reference to N independent blocks
  #
  foreach (@block_sizes) {
    ($w, $h) = @$_;
    add_proto qw/void/, "aom_highbd_sad${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
    if ($w != 128 && $h != 128) {
      specialize "aom_highbd_sad${w}x${h}x4d", qw/sse2/;
    }
  }
840 841 842 843 844 845 846 847 848 849 850
  specialize qw/aom_highbd_sad128x128x4d avx2/;
  specialize qw/aom_highbd_sad128x64x4d  avx2/;
  specialize qw/aom_highbd_sad64x128x4d  avx2/;
  specialize qw/aom_highbd_sad64x64x4d   avx2/;
  specialize qw/aom_highbd_sad64x32x4d   avx2/;
  specialize qw/aom_highbd_sad32x64x4d   avx2/;
  specialize qw/aom_highbd_sad32x32x4d   avx2/;
  specialize qw/aom_highbd_sad32x16x4d   avx2/;
  specialize qw/aom_highbd_sad16x32x4d   avx2/;
  specialize qw/aom_highbd_sad16x16x4d   avx2/;
  specialize qw/aom_highbd_sad16x8x4d    avx2/;
Yaowu Xu's avatar
Yaowu Xu committed
851 852 853 854 855 856 857 858 859 860 861 862
}

#
# Structured Similarity (SSIM)
#
if (aom_config("CONFIG_INTERNAL_STATS") eq "yes") {
  add_proto qw/void aom_ssim_parms_8x8/, "const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
  specialize qw/aom_ssim_parms_8x8/, "$sse2_x86_64";

  add_proto qw/void aom_ssim_parms_16x16/, "const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
  specialize qw/aom_ssim_parms_16x16/, "$sse2_x86_64";

863
  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
864 865 866
    add_proto qw/void aom_highbd_ssim_parms_8x8/, "const uint16_t *s, int sp, const uint16_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
  }
}
867
}  # CONFIG_AV1_ENCODER
Yaowu Xu's avatar
Yaowu Xu committed
868

869
if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886

#
# Specialty Variance
#
add_proto qw/void aom_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";

add_proto qw/void aom_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";

specialize qw/aom_get16x16var sse2 avx2 neon msa/;
specialize qw/aom_get8x8var   sse2      neon msa/;


add_proto qw/unsigned int aom_mse16x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
add_proto qw/unsigned int aom_mse16x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
add_proto qw/unsigned int aom_mse8x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
add_proto qw/unsigned int aom_mse8x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";

887 888 889 890
specialize qw/aom_mse16x16          sse2 avx2 neon msa/;
specialize qw/aom_mse16x8           sse2           msa/;
specialize qw/aom_mse8x16           sse2           msa/;
specialize qw/aom_mse8x8            sse2           msa/;
Yaowu Xu's avatar
Yaowu Xu committed
891

892
if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909
  foreach $bd (8, 10, 12) {
    add_proto qw/void/, "aom_highbd_${bd}_get16x16var", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
    add_proto qw/void/, "aom_highbd_${bd}_get8x8var", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";

    add_proto qw/unsigned int/, "aom_highbd_${bd}_mse16x16", "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
    add_proto qw/unsigned int/, "aom_highbd_${bd}_mse16x8", "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
    add_proto qw/unsigned int/, "aom_highbd_${bd}_mse8x16", "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
    add_proto qw/unsigned int/, "aom_highbd_${bd}_mse8x8", "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";

    specialize "aom_highbd_${bd}_mse16x16", qw/sse2/;
    specialize "aom_highbd_${bd}_mse8x8", qw/sse2/;
  }
}

#
# ...
#
910
add_proto qw/void aom_upsampled_pred/, "uint8_t *comp_pred, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref, int ref_stride";
Yaowu Xu's avatar
Yaowu Xu committed
911
specialize qw/aom_upsampled_pred sse2/;
912
add_proto qw/void aom_comp_avg_upsampled_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref, int ref_stride";
Yaowu Xu's avatar
Yaowu Xu committed
913 914
specialize qw/aom_comp_avg_upsampled_pred sse2/;

915
if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
916
  add_proto qw/void aom_highbd_upsampled_pred/, "uint16_t *comp_pred, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref8, int ref_stride, int bd";
Yaowu Xu's avatar
Yaowu Xu committed
917
  specialize qw/aom_highbd_upsampled_pred sse2/;
918
  add_proto qw/void aom_highbd_comp_avg_upsampled_pred/, "uint16_t *comp_pred, const uint8_t *pred8, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref8, int ref_stride, int bd";
Yaowu Xu's avatar
Yaowu Xu committed
919 920 921 922 923 924 925 926 927 928 929 930 931 932 933
  specialize qw/aom_highbd_comp_avg_upsampled_pred sse2/;
}

#
# ...
#
add_proto qw/unsigned int aom_get_mb_ss/, "const int16_t *";
add_proto qw/unsigned int aom_get4x4sse_cs/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride";

specialize qw/aom_get_mb_ss sse2 msa/;
specialize qw/aom_get4x4sse_cs neon msa/;

#
# Variance / Subpixel Variance / Subpixel Avg Variance
#
Jingning Han's avatar
Jingning Han committed
934 935
  add_proto qw/unsigned int/, "aom_variance2x2", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";

936 937 938 939
  add_proto qw/unsigned int/, "aom_variance2x4", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";

  add_proto qw/unsigned int/, "aom_variance4x2", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";

Yaowu Xu's avatar
Yaowu Xu committed
940 941 942 943 944 945 946
foreach (@block_sizes) {
  ($w, $h) = @$_;
  add_proto qw/unsigned int/, "aom_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
  add_proto qw/uint32_t/, "aom_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
  add_proto qw/uint32_t/, "aom_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
}

947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973
specialize qw/aom_variance64x64     sse2 avx2 neon msa/;
specialize qw/aom_variance64x32     sse2 avx2 neon msa/;
specialize qw/aom_variance32x64     sse2      neon msa/;
specialize qw/aom_variance32x32     sse2 avx2 neon msa/;
specialize qw/aom_variance32x16     sse2 avx2 msa/;
specialize qw/aom_variance16x32     sse2      msa/;
specialize qw/aom_variance16x16     sse2 avx2 neon msa/;
specialize qw/aom_variance16x8      sse2      neon msa/;
specialize qw/aom_variance8x16      sse2      neon msa/;
specialize qw/aom_variance8x8       sse2      neon msa/;
specialize qw/aom_variance8x4       sse2           msa/;
specialize qw/aom_variance4x8       sse2           msa/;
specialize qw/aom_variance4x4       sse2           msa/;

specialize qw/aom_sub_pixel_variance64x64     avx2 neon msa sse2 ssse3/;
specialize qw/aom_sub_pixel_variance64x32               msa sse2 ssse3/;
specialize qw/aom_sub_pixel_variance32x64               msa sse2 ssse3/;
specialize qw/aom_sub_pixel_variance32x32     avx2 neon msa sse2 ssse3/;
specialize qw/aom_sub_pixel_variance32x16               msa sse2 ssse3/;
specialize qw/aom_sub_pixel_variance16x32               msa sse2 ssse3/;
specialize qw/aom_sub_pixel_variance16x16          neon msa sse2 ssse3/;
specialize qw/aom_sub_pixel_variance16x8                msa sse2 ssse3/;
specialize qw/aom_sub_pixel_variance8x16                msa sse2 ssse3/;
specialize qw/aom_sub_pixel_variance8x8            neon msa sse2 ssse3/;
specialize qw/aom_sub_pixel_variance8x4                 msa sse2 ssse3/;
specialize qw/aom_sub_pixel_variance4x8                 msa sse2 ssse3/;
specialize qw/aom_sub_pixel_variance4x4                 msa sse2 ssse3/;
Yaowu Xu's avatar
Yaowu Xu committed
974 975 976 977 978 979 980 981 982 983 984 985 986 987

specialize qw/aom_sub_pixel_avg_variance64x64 avx2 msa sse2 ssse3/;
specialize qw/aom_sub_pixel_avg_variance64x32      msa sse2 ssse3/;
specialize qw/aom_sub_pixel_avg_variance32x64      msa sse2 ssse3/;
specialize qw/aom_sub_pixel_avg_variance32x32 avx2 msa sse2 ssse3/;
specialize qw/aom_sub_pixel_avg_variance32x16      msa sse2 ssse3/;
specialize qw/aom_sub_pixel_avg_variance16x32      msa sse2 ssse3/;
specialize qw/aom_sub_pixel_avg_variance16x16      msa sse2 ssse3/;
specialize qw/aom_sub_pixel_avg_variance16x8       msa sse2 ssse3/;
specialize qw/aom_sub_pixel_avg_variance8x16       msa sse2 ssse3/;
specialize qw/aom_sub_pixel_avg_variance8x8        msa sse2 ssse3/;
specialize qw/aom_sub_pixel_avg_variance8x4        msa sse2 ssse3/;
specialize qw/aom_sub_pixel_avg_variance4x8        msa sse2 ssse3/;
specialize qw/aom_sub_pixel_avg_variance4x4        msa sse2 ssse3/;
988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003

if (aom_config("CONFIG_EXT_PARTITION_TYPES")) {
  specialize qw/aom_variance4x16 sse2/;
  specialize qw/aom_variance16x4 sse2/;
  specialize qw/aom_variance8x32 sse2/;
  specialize qw/aom_variance32x8 sse2/;
  specialize qw/aom_sub_pixel_variance4x16 sse2 ssse3/;
  specialize qw/aom_sub_pixel_variance16x4 sse2 ssse3/;
  specialize qw/aom_sub_pixel_variance8x32 sse2 ssse3/;
  specialize qw/aom_sub_pixel_variance32x8 sse2 ssse3/;
  specialize qw/aom_sub_pixel_avg_variance4x16 sse2 ssse3/;
  specialize qw/aom_sub_pixel_avg_variance16x4 sse2 ssse3/;
  specialize qw/aom_sub_pixel_avg_variance8x32 sse2 ssse3/;
  specialize qw/aom_sub_pixel_avg_variance32x8 sse2 ssse3/;
}

1004
if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
1005
  foreach $bd (8, 10, 12) {
1006 1007 1008 1009 1010 1011
    add_proto qw/unsigned int/, "aom_highbd_${bd}_variance2x2", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";

    add_proto qw/unsigned int/, "aom_highbd_${bd}_variance2x4", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";

    add_proto qw/unsigned int/, "aom_highbd_${bd}_variance4x2", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";

Yaowu Xu's avatar
Yaowu Xu committed
1012 1013 1014 1015 1016 1017 1018 1019
    foreach (@block_sizes) {
      ($w, $h) = @$_;
      add_proto qw/unsigned int/, "aom_highbd_${bd}_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
      add_proto qw/uint32_t/, "aom_highbd_${bd}_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
      add_proto qw/uint32_t/, "aom_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
      if ($w != 128 && $h != 128 && $w != 4 && $h != 4) {
        specialize "aom_highbd_${bd}_variance${w}x${h}", "sse2";
      }
1020 1021
      # TODO(david.barker): When ext-partition-types is enabled, we currenly
      # don't have vectorized 4x16 highbd variance functions
Yaowu Xu's avatar
Yaowu Xu committed
1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034
      if ($w == 4 && $h == 4) {
        specialize "aom_highbd_${bd}_variance${w}x${h}", "sse4_1";
      }
      if ($w != 128 && $h != 128 && $w != 4) {
        specialize "aom_highbd_${bd}_sub_pixel_variance${w}x${h}", qw/sse2/;
        specialize "aom_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", qw/sse2/;
      }
      if ($w == 4 && $h == 4) {
        specialize "aom_highbd_${bd}_sub_pixel_variance${w}x${h}", "sse4_1";
        specialize "aom_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", "sse4_1";
      }
    }
  }
1035
}  # CONFIG_HIGHBITDEPTH
Yaowu Xu's avatar
Yaowu Xu committed
1036 1037 1038 1039 1040 1041 1042

if (aom_config("CONFIG_EXT_INTER") eq "yes") {
#
# Masked Variance / Masked Subpixel Variance
#
  foreach (@block_sizes) {
    ($w, $h) = @$_;
David Barker's avatar
David Barker committed
1043
    add_proto qw/unsigned int/, "aom_masked_sub_pixel_variance${w}x${h}", "const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse";
1044
    specialize "aom_masked_sub_pixel_variance${w}x${h}", qw/ssse3/;
Yaowu Xu's avatar
Yaowu Xu committed
1045 1046
  }

1047
  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
David Barker's avatar
David Barker committed
1048
    foreach $bd ("_8_", "_10_", "_12_") {
Yaowu Xu's avatar
Yaowu Xu committed
1049 1050
      foreach (@block_sizes) {
        ($w, $h) = @$_;
David Barker's avatar
David Barker committed
1051
        add_proto qw/unsigned int/, "aom_highbd${bd}masked_sub_pixel_variance${w}x${h}", "const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse";
1052
        specialize "aom_highbd${bd}masked_sub_pixel_variance${w}x${h}", qw/ssse3/;
Yaowu Xu's avatar
Yaowu Xu committed
1053 1054 1055 1056 1057 1058 1059 1060
      }
    }
  }
}

#
# OBMC Variance / OBMC Subpixel Variance
#
Yue Chen's avatar
Yue Chen committed
1061
if (aom_config("CONFIG_MOTION_VAR") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
1062 1063 1064 1065 1066 1067 1068
  foreach (@block_sizes) {
    ($w, $h) = @$_;
    add_proto qw/unsigned int/, "aom_obmc_variance${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
    add_proto qw/unsigned int/, "aom_obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
    specialize "aom_obmc_variance${w}x${h}", q/sse4_1/;
  }

1069
  if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123
    foreach $bd ("_", "_10_", "_12_") {
      foreach (@block_sizes) {
        ($w, $h) = @$_;
        add_proto qw/unsigned int/, "aom_highbd${bd}obmc_variance${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
        add_proto qw/unsigned int/, "aom_highbd${bd}obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
        specialize "aom_highbd${bd}obmc_variance${w}x${h}", qw/sse4_1/;
      }
    }
  }
}

add_proto qw/uint32_t aom_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
  specialize qw/aom_sub_pixel_avg_variance64x64 avx2 msa sse2 ssse3/;

add_proto qw/uint32_t aom_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
  specialize qw/aom_sub_pixel_avg_variance64x32 msa sse2 ssse3/;

add_proto qw/uint32_t aom_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
  specialize qw/aom_sub_pixel_avg_variance32x64 msa sse2 ssse3/;

add_proto qw/uint32_t aom_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
  specialize qw/aom_sub_pixel_avg_variance32x32 avx2 msa sse2 ssse3/;

add_proto qw/uint32_t aom_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
  specialize qw/aom_sub_pixel_avg_variance32x16 msa sse2 ssse3/;

add_proto qw/uint32_t aom_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
  specialize qw/aom_sub_pixel_avg_variance16x32 msa sse2 ssse3/;

add_proto qw/uint32_t aom_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
  specialize qw/aom_sub_pixel_avg_variance16x16 msa sse2 ssse3/;

add_proto qw/uint32_t aom_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
  specialize qw/aom_sub_pixel_avg_variance16x8 msa sse2 ssse3/;

add_proto qw/uint32_t aom_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
  specialize qw/aom_sub_pixel_avg_variance8x16 msa sse2 ssse3/;

add_proto qw/uint32_t aom_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
  specialize qw/aom_sub_pixel_avg_variance8x8 msa sse2 ssse3/;

add_proto qw/uint32_t aom_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
  specialize qw/aom_sub_pixel_avg_variance8x4 msa sse2 ssse3/;

add_proto qw/uint32_t aom_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
  specialize qw/aom_sub_pixel_avg_variance4x8 msa sse2 ssse3/;

add_proto qw/uint32_t aom_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
  specialize qw/aom_sub_pixel_avg_variance4x4 msa sse2 ssse3/;

#
# Specialty Subpixel
#
add_proto qw/uint32_t aom_variance_halfpixvar16x16_h/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, uint32_t *sse";
1124
  specialize qw/aom_variance_halfpixvar16x16_h sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
1125 1126

add_proto qw/uint32_t aom_variance_halfpixvar16x16_v/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, uint32_t *sse";
1127
  specialize qw/aom_variance_halfpixvar16x16_v sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
1128 1129

add_proto qw/uint32_t aom_variance_halfpixvar16x16_hv/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int  ref_stride, uint32_t *sse";
1130
  specialize qw/aom_variance_halfpixvar16x16_hv sse2/;
Yaowu Xu's avatar
Yaowu Xu committed
1131 1132 1133 1134 1135

#
# Comp Avg
#
add_proto qw/void aom_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride";
1136
if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
Yaowu Xu's avatar
Yaowu Xu committed
1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154