vp9_rtcd_defs.pl 51.8 KB
Newer Older
James Zern's avatar
James Zern committed
1
2
3
4
5
6
7
sub vp9_common_forward_decls() {
print <<EOF
/*
 * VP9
 */

#include "vpx/vpx_integer.h"
8
#include "vp9/common/vp9_common.h"
James Zern's avatar
James Zern committed
9
10
11
12
13
14
15
#include "vp9/common/vp9_enums.h"

struct macroblockd;

/* Encoder forward decls */
struct macroblock;
struct vp9_variance_vtable;
16
struct search_site_config;
James Zern's avatar
James Zern committed
17
18
19
20
21
22
23
struct mv;
union int_mv;
struct yv12_buffer_config;
EOF
}
forward_decls qw/vp9_common_forward_decls/;

Johann's avatar
Johann committed
24
25
26
27
28
29
# x86inc.asm had specific constraints. break it out so it's easy to disable.
# zero all the variables to avoid tricky else conditions.
$mmx_x86inc = $sse_x86inc = $sse2_x86inc = $ssse3_x86inc = $avx_x86inc =
  $avx2_x86inc = '';
$mmx_x86_64_x86inc = $sse_x86_64_x86inc = $sse2_x86_64_x86inc =
  $ssse3_x86_64_x86inc = $avx_x86_64_x86inc = $avx2_x86_64_x86inc = '';
James Zern's avatar
James Zern committed
30
31
32
33
34
35
36
if (vpx_config("CONFIG_USE_X86INC") eq "yes") {
  $mmx_x86inc = 'mmx';
  $sse_x86inc = 'sse';
  $sse2_x86inc = 'sse2';
  $ssse3_x86inc = 'ssse3';
  $avx_x86inc = 'avx';
  $avx2_x86inc = 'avx2';
Johann's avatar
Johann committed
37
38
39
40
41
42
43
44
  if ($opts{arch} eq "x86_64") {
    $mmx_x86_64_x86inc = 'mmx';
    $sse_x86_64_x86inc = 'sse';
    $sse2_x86_64_x86inc = 'sse2';
    $ssse3_x86_64_x86inc = 'ssse3';
    $avx_x86_64_x86inc = 'avx';
    $avx2_x86_64_x86inc = 'avx2';
  }
James Zern's avatar
James Zern committed
45
46
}

Johann's avatar
Johann committed
47
48
# functions that are 64 bit only.
$mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 = $avx_x86_64 = $avx2_x86_64 = '';
James Zern's avatar
James Zern committed
49
50
51
52
53
54
55
56
if ($opts{arch} eq "x86_64") {
  $mmx_x86_64 = 'mmx';
  $sse2_x86_64 = 'sse2';
  $ssse3_x86_64 = 'ssse3';
  $avx_x86_64 = 'avx';
  $avx2_x86_64 = 'avx2';
}

Johann's avatar
Johann committed
57
# optimizations which depend on multiple features
Johann's avatar
Johann committed
58
$avx2_ssse3 = '';
Johann's avatar
Johann committed
59
60
61
62
if ((vpx_config("HAVE_AVX2") eq "yes") && (vpx_config("HAVE_SSSE3") eq "yes")) {
  $avx2_ssse3 = 'avx2';
}

James Zern's avatar
James Zern committed
63
64
65
66
67
68
69
#
# RECON
#
add_proto qw/void vp9_d207_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d207_predictor_4x4/, "$ssse3_x86inc";

add_proto qw/void vp9_d45_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
James Zern's avatar
James Zern committed
70
specialize qw/vp9_d45_predictor_4x4 neon/, "$ssse3_x86inc";
James Zern's avatar
James Zern committed
71
72
73
74
75

add_proto qw/void vp9_d63_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d63_predictor_4x4/, "$ssse3_x86inc";

add_proto qw/void vp9_h_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
76
specialize qw/vp9_h_predictor_4x4 neon dspr2 msa/, "$ssse3_x86inc";
James Zern's avatar
James Zern committed
77
78
79
80
81

add_proto qw/void vp9_d117_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d117_predictor_4x4/;

add_proto qw/void vp9_d135_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
James Zern's avatar
James Zern committed
82
specialize qw/vp9_d135_predictor_4x4 neon/;
James Zern's avatar
James Zern committed
83
84
85
86
87

add_proto qw/void vp9_d153_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d153_predictor_4x4/, "$ssse3_x86inc";

add_proto qw/void vp9_v_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
88
specialize qw/vp9_v_predictor_4x4 neon msa/, "$sse_x86inc";
James Zern's avatar
James Zern committed
89
90

add_proto qw/void vp9_tm_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
91
specialize qw/vp9_tm_predictor_4x4 neon dspr2 msa/, "$sse_x86inc";
James Zern's avatar
James Zern committed
92
93

add_proto qw/void vp9_dc_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
94
specialize qw/vp9_dc_predictor_4x4 dspr2 msa neon/, "$sse_x86inc";
James Zern's avatar
James Zern committed
95
96

add_proto qw/void vp9_dc_top_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
97
specialize qw/vp9_dc_top_predictor_4x4 msa neon/, "$sse_x86inc";
James Zern's avatar
James Zern committed
98
99

add_proto qw/void vp9_dc_left_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
100
specialize qw/vp9_dc_left_predictor_4x4 msa neon/, "$sse_x86inc";
James Zern's avatar
James Zern committed
101
102

add_proto qw/void vp9_dc_128_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
103
specialize qw/vp9_dc_128_predictor_4x4 msa neon/, "$sse_x86inc";
James Zern's avatar
James Zern committed
104
105
106
107
108

add_proto qw/void vp9_d207_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d207_predictor_8x8/, "$ssse3_x86inc";

add_proto qw/void vp9_d45_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
James Zern's avatar
James Zern committed
109
specialize qw/vp9_d45_predictor_8x8 neon/, "$ssse3_x86inc";
James Zern's avatar
James Zern committed
110
111
112
113
114

add_proto qw/void vp9_d63_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d63_predictor_8x8/, "$ssse3_x86inc";

add_proto qw/void vp9_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
115
specialize qw/vp9_h_predictor_8x8 neon dspr2 msa/, "$ssse3_x86inc";
James Zern's avatar
James Zern committed
116
117
118
119
120
121
122
123
124
125
126

add_proto qw/void vp9_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d117_predictor_8x8/;

add_proto qw/void vp9_d135_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d135_predictor_8x8/;

add_proto qw/void vp9_d153_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d153_predictor_8x8/, "$ssse3_x86inc";

add_proto qw/void vp9_v_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
127
specialize qw/vp9_v_predictor_8x8 neon msa/, "$sse_x86inc";
James Zern's avatar
James Zern committed
128
129

add_proto qw/void vp9_tm_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
130
specialize qw/vp9_tm_predictor_8x8 neon dspr2 msa/, "$sse2_x86inc";
James Zern's avatar
James Zern committed
131
132

add_proto qw/void vp9_dc_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
133
specialize qw/vp9_dc_predictor_8x8 dspr2 neon msa/, "$sse_x86inc";
James Zern's avatar
James Zern committed
134
135

add_proto qw/void vp9_dc_top_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
136
specialize qw/vp9_dc_top_predictor_8x8 neon msa/, "$sse_x86inc";
James Zern's avatar
James Zern committed
137
138

add_proto qw/void vp9_dc_left_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
139
specialize qw/vp9_dc_left_predictor_8x8 neon msa/, "$sse_x86inc";
James Zern's avatar
James Zern committed
140
141

add_proto qw/void vp9_dc_128_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
142
specialize qw/vp9_dc_128_predictor_8x8 neon msa/, "$sse_x86inc";
James Zern's avatar
James Zern committed
143
144
145
146
147

add_proto qw/void vp9_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d207_predictor_16x16/, "$ssse3_x86inc";

add_proto qw/void vp9_d45_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
148
specialize qw/vp9_d45_predictor_16x16 neon/, "$ssse3_x86inc";
James Zern's avatar
James Zern committed
149
150
151
152
153

add_proto qw/void vp9_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d63_predictor_16x16/, "$ssse3_x86inc";

add_proto qw/void vp9_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
154
specialize qw/vp9_h_predictor_16x16 neon dspr2 msa/, "$ssse3_x86inc";
James Zern's avatar
James Zern committed
155
156
157
158
159
160
161
162
163
164
165

add_proto qw/void vp9_d117_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d117_predictor_16x16/;

add_proto qw/void vp9_d135_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d135_predictor_16x16/;

add_proto qw/void vp9_d153_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d153_predictor_16x16/, "$ssse3_x86inc";

add_proto qw/void vp9_v_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
166
specialize qw/vp9_v_predictor_16x16 neon msa/, "$sse2_x86inc";
James Zern's avatar
James Zern committed
167
168

add_proto qw/void vp9_tm_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
169
specialize qw/vp9_tm_predictor_16x16 neon msa/, "$sse2_x86inc";
James Zern's avatar
James Zern committed
170
171

add_proto qw/void vp9_dc_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
172
specialize qw/vp9_dc_predictor_16x16 dspr2 neon msa/, "$sse2_x86inc";
James Zern's avatar
James Zern committed
173
174

add_proto qw/void vp9_dc_top_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
175
specialize qw/vp9_dc_top_predictor_16x16 neon msa/, "$sse2_x86inc";
James Zern's avatar
James Zern committed
176
177

add_proto qw/void vp9_dc_left_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
178
specialize qw/vp9_dc_left_predictor_16x16 neon msa/, "$sse2_x86inc";
James Zern's avatar
James Zern committed
179
180

add_proto qw/void vp9_dc_128_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
181
specialize qw/vp9_dc_128_predictor_16x16 neon msa/, "$sse2_x86inc";
James Zern's avatar
James Zern committed
182
183
184
185
186
187
188
189
190
191
192

add_proto qw/void vp9_d207_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d207_predictor_32x32/, "$ssse3_x86inc";

add_proto qw/void vp9_d45_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d45_predictor_32x32/, "$ssse3_x86inc";

add_proto qw/void vp9_d63_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d63_predictor_32x32/, "$ssse3_x86inc";

add_proto qw/void vp9_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
193
specialize qw/vp9_h_predictor_32x32 neon msa/, "$ssse3_x86inc";
James Zern's avatar
James Zern committed
194
195
196
197
198
199
200
201

add_proto qw/void vp9_d117_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d117_predictor_32x32/;

add_proto qw/void vp9_d135_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d135_predictor_32x32/;

add_proto qw/void vp9_d153_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
202
specialize qw/vp9_d153_predictor_32x32/, "$ssse3_x86inc";
James Zern's avatar
James Zern committed
203
204

add_proto qw/void vp9_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
205
specialize qw/vp9_v_predictor_32x32 neon msa/, "$sse2_x86inc";
James Zern's avatar
James Zern committed
206
207

add_proto qw/void vp9_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
Johann's avatar
Johann committed
208
specialize qw/vp9_tm_predictor_32x32 neon msa/, "$sse2_x86_64_x86inc";
James Zern's avatar
James Zern committed
209
210

add_proto qw/void vp9_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
211
specialize qw/vp9_dc_predictor_32x32 msa neon/, "$sse2_x86inc";
James Zern's avatar
James Zern committed
212
213

add_proto qw/void vp9_dc_top_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
214
specialize qw/vp9_dc_top_predictor_32x32 msa neon/, "$sse2_x86inc";
James Zern's avatar
James Zern committed
215
216

add_proto qw/void vp9_dc_left_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
217
specialize qw/vp9_dc_left_predictor_32x32 msa neon/, "$sse2_x86inc";
James Zern's avatar
James Zern committed
218
219

add_proto qw/void vp9_dc_128_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
220
specialize qw/vp9_dc_128_predictor_32x32 msa neon/, "$sse2_x86inc";
James Zern's avatar
James Zern committed
221
222
223
224
225
226

#
# post proc
#
if (vpx_config("CONFIG_VP9_POSTPROC") eq "yes") {
add_proto qw/void vp9_mbpost_proc_down/, "uint8_t *dst, int pitch, int rows, int cols, int flimit";
Dmitry Kovalev's avatar
Dmitry Kovalev committed
227
specialize qw/vp9_mbpost_proc_down sse2/;
James Zern's avatar
James Zern committed
228
229
230
231
232
233
234
$vp9_mbpost_proc_down_sse2=vp9_mbpost_proc_down_xmm;

add_proto qw/void vp9_mbpost_proc_across_ip/, "uint8_t *src, int pitch, int rows, int cols, int flimit";
specialize qw/vp9_mbpost_proc_across_ip sse2/;
$vp9_mbpost_proc_across_ip_sse2=vp9_mbpost_proc_across_ip_xmm;

add_proto qw/void vp9_post_proc_down_and_across/, "const uint8_t *src_ptr, uint8_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit";
Dmitry Kovalev's avatar
Dmitry Kovalev committed
235
specialize qw/vp9_post_proc_down_and_across sse2/;
James Zern's avatar
James Zern committed
236
237
238
$vp9_post_proc_down_and_across_sse2=vp9_post_proc_down_and_across_xmm;

add_proto qw/void vp9_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch";
Dmitry Kovalev's avatar
Dmitry Kovalev committed
239
specialize qw/vp9_plane_add_noise sse2/;
James Zern's avatar
James Zern committed
240
$vp9_plane_add_noise_sse2=vp9_plane_add_noise_wmt;
JackyChen's avatar
JackyChen committed
241
242

add_proto qw/void vp9_filter_by_weight16x16/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight";
243
specialize qw/vp9_filter_by_weight16x16 sse2 msa/;
JackyChen's avatar
JackyChen committed
244
245

add_proto qw/void vp9_filter_by_weight8x8/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight";
246
specialize qw/vp9_filter_by_weight8x8 sse2 msa/;
James Zern's avatar
James Zern committed
247
248
249
250
251
252
}

#
# Sub Pixel Filters
#
add_proto qw/void vp9_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
253
specialize qw/vp9_convolve_copy neon dspr2 msa/, "$sse2_x86inc";
James Zern's avatar
James Zern committed
254
255

add_proto qw/void vp9_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
256
specialize qw/vp9_convolve_avg neon dspr2 msa/, "$sse2_x86inc";
James Zern's avatar
James Zern committed
257
258

add_proto qw/void vp9_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
259
specialize qw/vp9_convolve8 sse2 ssse3 neon dspr2 msa/, "$avx2_ssse3";
James Zern's avatar
James Zern committed
260
261

add_proto qw/void vp9_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
262
specialize qw/vp9_convolve8_horiz sse2 ssse3 neon dspr2 msa/, "$avx2_ssse3";
James Zern's avatar
James Zern committed
263
264

add_proto qw/void vp9_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
265
specialize qw/vp9_convolve8_vert sse2 ssse3 neon dspr2 msa/, "$avx2_ssse3";
James Zern's avatar
James Zern committed
266
267

add_proto qw/void vp9_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
268
specialize qw/vp9_convolve8_avg sse2 ssse3 neon dspr2 msa/;
James Zern's avatar
James Zern committed
269
270

add_proto qw/void vp9_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
271
specialize qw/vp9_convolve8_avg_horiz sse2 ssse3 neon dspr2 msa/;
James Zern's avatar
James Zern committed
272
273

add_proto qw/void vp9_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
274
specialize qw/vp9_convolve8_avg_vert sse2 ssse3 neon dspr2 msa/;
James Zern's avatar
James Zern committed
275
276
277
278

#
# dct
#
279
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
280
281
  # Note as optimized versions of these functions are added we need to add a check to ensure
  # that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
  add_proto qw/void vp9_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/vp9_idct4x4_1_add/;

  add_proto qw/void vp9_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/vp9_idct4x4_16_add/;

  add_proto qw/void vp9_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/vp9_idct8x8_1_add/;

  add_proto qw/void vp9_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/vp9_idct8x8_64_add/;

  add_proto qw/void vp9_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/vp9_idct8x8_12_add/;

  add_proto qw/void vp9_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/vp9_idct16x16_1_add/;
James Zern's avatar
James Zern committed
299

300
301
  add_proto qw/void vp9_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/vp9_idct16x16_256_add/;
James Zern's avatar
James Zern committed
302

303
304
  add_proto qw/void vp9_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/vp9_idct16x16_10_add/;
James Zern's avatar
James Zern committed
305

306
307
  add_proto qw/void vp9_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/vp9_idct32x32_1024_add/;
James Zern's avatar
James Zern committed
308

309
310
  add_proto qw/void vp9_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/vp9_idct32x32_34_add/;
James Zern's avatar
James Zern committed
311

312
313
  add_proto qw/void vp9_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/vp9_idct32x32_1_add/;
James Zern's avatar
James Zern committed
314

315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
  add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
  specialize qw/vp9_iht4x4_16_add/;

  add_proto qw/void vp9_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
  specialize qw/vp9_iht8x8_64_add/;

  add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
  specialize qw/vp9_iht16x16_256_add/;

  # dct and add

  add_proto qw/void vp9_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/vp9_iwht4x4_1_add/;

  add_proto qw/void vp9_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/vp9_iwht4x4_16_add/;
331

332
} else {
333
334
335
336
  # Force C versions if CONFIG_EMULATE_HARDWARE is 1
  if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
    add_proto qw/void vp9_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vp9_idct4x4_1_add/;
James Zern's avatar
James Zern committed
337

338
339
    add_proto qw/void vp9_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vp9_idct4x4_16_add/;
James Zern's avatar
James Zern committed
340

341
342
    add_proto qw/void vp9_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vp9_idct8x8_1_add/;
James Zern's avatar
James Zern committed
343

344
345
    add_proto qw/void vp9_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vp9_idct8x8_64_add/;
James Zern's avatar
James Zern committed
346

347
348
    add_proto qw/void vp9_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vp9_idct8x8_12_add/;
James Zern's avatar
James Zern committed
349

350
351
    add_proto qw/void vp9_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vp9_idct16x16_1_add/;
James Zern's avatar
James Zern committed
352

353
354
    add_proto qw/void vp9_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vp9_idct16x16_256_add/;
355

356
357
    add_proto qw/void vp9_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vp9_idct16x16_10_add/;
358

359
360
    add_proto qw/void vp9_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vp9_idct32x32_1024_add/;
361

362
363
    add_proto qw/void vp9_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vp9_idct32x32_34_add/;
364

365
366
    add_proto qw/void vp9_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vp9_idct32x32_1_add/;
367

368
369
    add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
    specialize qw/vp9_iht4x4_16_add/;
370

371
372
    add_proto qw/void vp9_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
    specialize qw/vp9_iht8x8_64_add/;
373

374
375
    add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
    specialize qw/vp9_iht16x16_256_add/;
376

377
    # dct and add
378

379
380
    add_proto qw/void vp9_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vp9_iwht4x4_1_add/;
381

382
383
384
385
    add_proto qw/void vp9_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vp9_iwht4x4_16_add/;
  } else {
    add_proto qw/void vp9_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
386
    specialize qw/vp9_idct4x4_1_add sse2 neon dspr2 msa/;
387
388

    add_proto qw/void vp9_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
389
    specialize qw/vp9_idct4x4_16_add sse2 neon dspr2 msa/;
390
391

    add_proto qw/void vp9_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
392
    specialize qw/vp9_idct8x8_1_add sse2 neon dspr2 msa/;
393
394

    add_proto qw/void vp9_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
Johann's avatar
Johann committed
395
    specialize qw/vp9_idct8x8_64_add sse2 neon dspr2 msa/, "$ssse3_x86_64_x86inc";
396
397

    add_proto qw/void vp9_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
Johann's avatar
Johann committed
398
    specialize qw/vp9_idct8x8_12_add sse2 neon dspr2 msa/, "$ssse3_x86_64_x86inc";
399
400

    add_proto qw/void vp9_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
401
    specialize qw/vp9_idct16x16_1_add sse2 neon dspr2 msa/;
402
403

    add_proto qw/void vp9_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
404
    specialize qw/vp9_idct16x16_256_add sse2 neon dspr2 msa/;
405
406

    add_proto qw/void vp9_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
407
    specialize qw/vp9_idct16x16_10_add sse2 neon dspr2 msa/;
408
409

    add_proto qw/void vp9_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
410
    specialize qw/vp9_idct32x32_1024_add sse2 neon dspr2 msa/;
411
412

    add_proto qw/void vp9_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
413
    specialize qw/vp9_idct32x32_34_add sse2 neon_asm dspr2 msa/;
414
    #is this a typo?
415
416
417
    $vp9_idct32x32_34_add_neon_asm=vp9_idct32x32_1024_add_neon;

    add_proto qw/void vp9_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
418
    specialize qw/vp9_idct32x32_1_add sse2 neon dspr2 msa/;
419
420

    add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
421
    specialize qw/vp9_iht4x4_16_add sse2 neon dspr2 msa/;
422
423

    add_proto qw/void vp9_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
424
    specialize qw/vp9_iht8x8_64_add sse2 neon dspr2 msa/;
425
426

    add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
427
    specialize qw/vp9_iht16x16_256_add sse2 dspr2 msa/;
428
429
430
431

    # dct and add

    add_proto qw/void vp9_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
432
    specialize qw/vp9_iwht4x4_1_add msa/;
433
434

    add_proto qw/void vp9_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
435
    specialize qw/vp9_iwht4x4_16_add msa/, "$sse2_x86inc";
436
  }
437
}
James Zern's avatar
James Zern committed
438

439
440
# High bitdepth functions
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
441
442
443
  #
  # Intra prediction
  #
444
445
  add_proto qw/void vp9_highbd_d207_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d207_predictor_4x4/;
446

447
448
  add_proto qw/void vp9_highbd_d45_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d45_predictor_4x4/;
449

450
451
  add_proto qw/void vp9_highbd_d63_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d63_predictor_4x4/;
452

453
454
  add_proto qw/void vp9_highbd_h_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_h_predictor_4x4/;
455

456
457
  add_proto qw/void vp9_highbd_d117_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d117_predictor_4x4/;
458

459
460
  add_proto qw/void vp9_highbd_d135_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d135_predictor_4x4/;
461

462
463
  add_proto qw/void vp9_highbd_d153_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d153_predictor_4x4/;
464

465
  add_proto qw/void vp9_highbd_v_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
James Zern's avatar
James Zern committed
466
  specialize qw/vp9_highbd_v_predictor_4x4/, "$sse_x86inc";
467

468
469
  add_proto qw/void vp9_highbd_tm_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_tm_predictor_4x4/, "$sse_x86inc";
470

471
472
  add_proto qw/void vp9_highbd_dc_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_dc_predictor_4x4/, "$sse_x86inc";
473

474
475
  add_proto qw/void vp9_highbd_dc_top_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_dc_top_predictor_4x4/;
476

477
478
  add_proto qw/void vp9_highbd_dc_left_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_dc_left_predictor_4x4/;
479

480
481
  add_proto qw/void vp9_highbd_dc_128_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_dc_128_predictor_4x4/;
482

483
484
  add_proto qw/void vp9_highbd_d207_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d207_predictor_8x8/;
485

486
487
  add_proto qw/void vp9_highbd_d45_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d45_predictor_8x8/;
488

489
490
  add_proto qw/void vp9_highbd_d63_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d63_predictor_8x8/;
491

492
493
  add_proto qw/void vp9_highbd_h_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_h_predictor_8x8/;
494

495
496
  add_proto qw/void vp9_highbd_d117_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d117_predictor_8x8/;
497

498
499
  add_proto qw/void vp9_highbd_d135_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d135_predictor_8x8/;
500

501
502
  add_proto qw/void vp9_highbd_d153_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d153_predictor_8x8/;
503

504
505
  add_proto qw/void vp9_highbd_v_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_v_predictor_8x8/, "$sse2_x86inc";
506

507
508
  add_proto qw/void vp9_highbd_tm_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_tm_predictor_8x8/, "$sse2_x86inc";
509

510
511
  add_proto qw/void vp9_highbd_dc_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_dc_predictor_8x8/, "$sse2_x86inc";;
512

513
514
  add_proto qw/void vp9_highbd_dc_top_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_dc_top_predictor_8x8/;
515

516
517
  add_proto qw/void vp9_highbd_dc_left_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_dc_left_predictor_8x8/;
518

519
520
  add_proto qw/void vp9_highbd_dc_128_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_dc_128_predictor_8x8/;
521

522
523
  add_proto qw/void vp9_highbd_d207_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d207_predictor_16x16/;
524

525
526
  add_proto qw/void vp9_highbd_d45_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d45_predictor_16x16/;
527

528
529
  add_proto qw/void vp9_highbd_d63_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d63_predictor_16x16/;
530

531
532
  add_proto qw/void vp9_highbd_h_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_h_predictor_16x16/;
533

534
535
  add_proto qw/void vp9_highbd_d117_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d117_predictor_16x16/;
536

537
538
  add_proto qw/void vp9_highbd_d135_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d135_predictor_16x16/;
539

540
541
  add_proto qw/void vp9_highbd_d153_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d153_predictor_16x16/;
542

543
  add_proto qw/void vp9_highbd_v_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
James Zern's avatar
James Zern committed
544
  specialize qw/vp9_highbd_v_predictor_16x16/, "$sse2_x86inc";
545

546
  add_proto qw/void vp9_highbd_tm_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
547
  specialize qw/vp9_highbd_tm_predictor_16x16/, "$sse2_x86_64_x86inc";
548

549
550
  add_proto qw/void vp9_highbd_dc_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_dc_predictor_16x16/, "$sse2_x86inc";
551

552
553
  add_proto qw/void vp9_highbd_dc_top_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_dc_top_predictor_16x16/;
554

555
556
  add_proto qw/void vp9_highbd_dc_left_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_dc_left_predictor_16x16/;
557

558
559
  add_proto qw/void vp9_highbd_dc_128_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_dc_128_predictor_16x16/;
560

561
562
  add_proto qw/void vp9_highbd_d207_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d207_predictor_32x32/;
563

564
565
  add_proto qw/void vp9_highbd_d45_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d45_predictor_32x32/;
566

567
568
  add_proto qw/void vp9_highbd_d63_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d63_predictor_32x32/;
569

570
571
  add_proto qw/void vp9_highbd_h_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_h_predictor_32x32/;
572

573
574
  add_proto qw/void vp9_highbd_d117_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d117_predictor_32x32/;
575

576
577
  add_proto qw/void vp9_highbd_d135_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d135_predictor_32x32/;
578

579
580
  add_proto qw/void vp9_highbd_d153_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_d153_predictor_32x32/;
581

582
583
  add_proto qw/void vp9_highbd_v_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_v_predictor_32x32/, "$sse2_x86inc";
584

585
  add_proto qw/void vp9_highbd_tm_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
586
  specialize qw/vp9_highbd_tm_predictor_32x32/, "$sse2_x86_64_x86inc";
587

588
  add_proto qw/void vp9_highbd_dc_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
589
  specialize qw/vp9_highbd_dc_predictor_32x32/, "$sse2_x86_64_x86inc";
590

591
592
  add_proto qw/void vp9_highbd_dc_top_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_dc_top_predictor_32x32/;
593

594
595
  add_proto qw/void vp9_highbd_dc_left_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_dc_left_predictor_32x32/;
596

597
598
  add_proto qw/void vp9_highbd_dc_128_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vp9_highbd_dc_128_predictor_32x32/;
599

600
601
602
  #
  # Sub Pixel Filters
  #
603
604
  add_proto qw/void vp9_highbd_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
  specialize qw/vp9_highbd_convolve_copy/;
605

606
607
  add_proto qw/void vp9_highbd_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
  specialize qw/vp9_highbd_convolve_avg/;
608

609
610
  add_proto qw/void vp9_highbd_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
  specialize qw/vp9_highbd_convolve8/, "$sse2_x86_64";
611

612
613
  add_proto qw/void vp9_highbd_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
  specialize qw/vp9_highbd_convolve8_horiz/, "$sse2_x86_64";
614

615
616
  add_proto qw/void vp9_highbd_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
  specialize qw/vp9_highbd_convolve8_vert/, "$sse2_x86_64";
617

618
619
  add_proto qw/void vp9_highbd_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
  specialize qw/vp9_highbd_convolve8_avg/, "$sse2_x86_64";
620

621
622
  add_proto qw/void vp9_highbd_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
  specialize qw/vp9_highbd_convolve8_avg_horiz/, "$sse2_x86_64";
623

624
625
  add_proto qw/void vp9_highbd_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
  specialize qw/vp9_highbd_convolve8_avg_vert/, "$sse2_x86_64";
626

627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
  #
  # post proc
  #
  if (vpx_config("CONFIG_VP9_POSTPROC") eq "yes") {
    add_proto qw/void vp9_highbd_mbpost_proc_down/, "uint16_t *dst, int pitch, int rows, int cols, int flimit";
    specialize qw/vp9_highbd_mbpost_proc_down/;

    add_proto qw/void vp9_highbd_mbpost_proc_across_ip/, "uint16_t *src, int pitch, int rows, int cols, int flimit";
    specialize qw/vp9_highbd_mbpost_proc_across_ip/;

    add_proto qw/void vp9_highbd_post_proc_down_and_across/, "const uint16_t *src_ptr, uint16_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit";
    specialize qw/vp9_highbd_post_proc_down_and_across/;

    add_proto qw/void vp9_highbd_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch";
    specialize qw/vp9_highbd_plane_add_noise/;
  }

644
645
646
  #
  # dct
  #
647
648
  # Note as optimized versions of these functions are added we need to add a check to ensure
  # that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
649
650
  add_proto qw/void vp9_highbd_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
  specialize qw/vp9_highbd_idct4x4_1_add/;
651

652
653
  add_proto qw/void vp9_highbd_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
  specialize qw/vp9_highbd_idct8x8_1_add/;
654

655
656
  add_proto qw/void vp9_highbd_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
  specialize qw/vp9_highbd_idct16x16_1_add/;
657

658
659
  add_proto qw/void vp9_highbd_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
  specialize qw/vp9_highbd_idct32x32_1024_add/;
660

661
662
  add_proto qw/void vp9_highbd_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
  specialize qw/vp9_highbd_idct32x32_34_add/;
663

664
665
  add_proto qw/void vp9_highbd_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
  specialize qw/vp9_highbd_idct32x32_1_add/;
666

667
668
  add_proto qw/void vp9_highbd_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
  specialize qw/vp9_highbd_iht4x4_16_add/;
669

670
671
  add_proto qw/void vp9_highbd_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd";
  specialize qw/vp9_highbd_iht8x8_64_add/;
672

673
674
  add_proto qw/void vp9_highbd_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type, int bd";
  specialize qw/vp9_highbd_iht16x16_256_add/;
James Zern's avatar
James Zern committed
675

676
  # dct and add
James Zern's avatar
James Zern committed
677

678
679
  add_proto qw/void vp9_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
  specialize qw/vp9_highbd_iwht4x4_1_add/;
James Zern's avatar
James Zern committed
680

681
682
  add_proto qw/void vp9_highbd_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
  specialize qw/vp9_highbd_iwht4x4_16_add/;
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718

  # Force C versions if CONFIG_EMULATE_HARDWARE is 1
  if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {

    add_proto qw/void vp9_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
    specialize qw/vp9_highbd_idct4x4_16_add/;

    add_proto qw/void vp9_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
    specialize qw/vp9_highbd_idct8x8_64_add/;

    add_proto qw/void vp9_highbd_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
    specialize qw/vp9_highbd_idct8x8_10_add/;

    add_proto qw/void vp9_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
    specialize qw/vp9_highbd_idct16x16_256_add/;

    add_proto qw/void vp9_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
    specialize qw/vp9_highbd_idct16x16_10_add/;

  } else {

    add_proto qw/void vp9_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
    specialize qw/vp9_highbd_idct4x4_16_add sse2/;

    add_proto qw/void vp9_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
    specialize qw/vp9_highbd_idct8x8_64_add sse2/;

    add_proto qw/void vp9_highbd_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
    specialize qw/vp9_highbd_idct8x8_10_add sse2/;

    add_proto qw/void vp9_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
    specialize qw/vp9_highbd_idct16x16_256_add sse2/;

    add_proto qw/void vp9_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
    specialize qw/vp9_highbd_idct16x16_10_add sse2/;
  }
719
}
James Zern's avatar
James Zern committed
720
721
722
723
724
725

#
# Encoder functions below this point.
#
if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") {

726
add_proto qw/unsigned int vp9_avg_8x8/, "const uint8_t *, int p";
727
specialize qw/vp9_avg_8x8 sse2 neon msa/;
728

729
add_proto qw/unsigned int vp9_avg_4x4/, "const uint8_t *, int p";
730
specialize qw/vp9_avg_4x4 sse2 msa/;
731

732
733
734
add_proto qw/void vp9_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
specialize qw/vp9_minmax_8x8 sse2/;

735
add_proto qw/void vp9_hadamard_8x8/, "int16_t const *src_diff, int src_stride, int16_t *coeff";
Johann's avatar
Johann committed
736
specialize qw/vp9_hadamard_8x8 sse2/, "$ssse3_x86_64_x86inc";
737

738
739
add_proto qw/void vp9_hadamard_16x16/, "int16_t const *src_diff, int src_stride, int16_t *coeff";
specialize qw/vp9_hadamard_16x16 sse2/;
740
741
742
743

add_proto qw/int16_t vp9_satd/, "const int16_t *coeff, int length";
specialize qw/vp9_satd sse2/;

744
add_proto qw/void vp9_int_pro_row/, "int16_t *hbuf, uint8_t const *ref, const int ref_stride, const int height";
Frank Galligan's avatar
Frank Galligan committed
745
specialize qw/vp9_int_pro_row sse2 neon/;
746
747

add_proto qw/int16_t vp9_int_pro_col/, "uint8_t const *ref, const int width";
Frank Galligan's avatar
Frank Galligan committed
748
specialize qw/vp9_int_pro_col sse2 neon/;
749

750
751
add_proto qw/int vp9_vector_var/, "int16_t const *ref, int16_t const *src, const int bwl";
specialize qw/vp9_vector_var sse2/;
752

753
754
755
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
  add_proto qw/unsigned int vp9_highbd_avg_8x8/, "const uint8_t *, int p";
  specialize qw/vp9_highbd_avg_8x8/;
756
757
  add_proto qw/unsigned int vp9_highbd_avg_4x4/, "const uint8_t *, int p";
  specialize qw/vp9_highbd_avg_4x4/;
758
  add_proto qw/void vp9_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
759
  specialize qw/vp9_highbd_minmax_8x8/;
760
761
}

James Zern's avatar
James Zern committed
762
763
# ENCODEMB INVOKE

764
765
766
767
768
769
770
771
#
# Denoiser
#
if (vpx_config("CONFIG_VP9_TEMPORAL_DENOISING") eq "yes") {
  add_proto qw/int vp9_denoiser_filter/, "const uint8_t *sig, int sig_stride, const uint8_t *mc_avg, int mc_avg_stride, uint8_t *avg, int avg_stride, int increase_denoising, BLOCK_SIZE bs, int motion_magnitude";
  specialize qw/vp9_denoiser_filter sse2/;
}

772
773
774
775
776
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
# the transform coefficients are held in 32-bit
# values, so the assembler code for  vp9_block_error can no longer be used.
  add_proto qw/int64_t vp9_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
  specialize qw/vp9_block_error/;
Jingning Han's avatar
Jingning Han committed
777

778
  add_proto qw/void vp9_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
779
  specialize qw/vp9_quantize_fp/;
780

781
  add_proto qw/void vp9_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
782
  specialize qw/vp9_quantize_fp_32x32/;
James Zern's avatar
James Zern committed
783

784
  add_proto qw/void vp9_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
785
  specialize qw/vp9_fdct8x8_quant/;
786
787
} else {
  add_proto qw/int64_t vp9_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
788
  specialize qw/vp9_block_error avx2 msa/, "$sse2_x86inc";
789

790
  add_proto qw/int64_t vp9_block_error_fp/, "const int16_t *coeff, const int16_t *dqcoeff, int block_size";
James Zern's avatar
James Zern committed
791
  specialize qw/vp9_block_error_fp neon/, "$sse2_x86inc";
792

793
  add_proto qw/void vp9_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
Johann's avatar
Johann committed
794
  specialize qw/vp9_quantize_fp neon sse2/, "$ssse3_x86_64_x86inc";
795

796
  add_proto qw/void vp9_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
Johann's avatar
Johann committed
797
  specialize qw/vp9_quantize_fp_32x32/, "$ssse3_x86_64_x86inc";
798

799
  add_proto qw/void vp9_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
800
  specialize qw/vp9_fdct8x8_quant sse2 ssse3 neon/;
801
}
James Zern's avatar
James Zern committed
802
803
804
805
806
807
808
809
810
811
812
813
814
815

#
# Structured Similarity (SSIM)
#
if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") {
    add_proto qw/void vp9_ssim_parms_8x8/, "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr";
    specialize qw/vp9_ssim_parms_8x8/, "$sse2_x86_64";

    add_proto qw/void vp9_ssim_parms_16x16/, "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr";
    specialize qw/vp9_ssim_parms_16x16/, "$sse2_x86_64";
}

# fdct functions

816
817
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
  add_proto qw/void vp9_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
818
  specialize qw/vp9_fht4x4 sse2/;
819
820

  add_proto qw/void vp9_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
821
  specialize qw/vp9_fht8x8 sse2/;
822
823

  add_proto qw/void vp9_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
824
  specialize qw/vp9_fht16x16 sse2/;
825
826

  add_proto qw/void vp9_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
827
  specialize qw/vp9_fwht4x4/, "$mmx_x86inc";
828
829

  add_proto qw/void vp9_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
830
  specialize qw/vp9_fdct4x4_1 sse2/;
James Zern's avatar
James Zern committed
831

832
  add_proto qw/void vp9_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
833
  specialize qw/vp9_fdct8x8_1 sse2/;
James Zern's avatar
James Zern committed
834

835
  add_proto qw/void vp9_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
836
  specialize qw/vp9_fdct16x16_1 sse2/;
James Zern's avatar
James Zern committed
837

838
  add_proto qw/void vp9_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
839
  specialize qw/vp9_fdct32x32_1 sse2/;
James Zern's avatar
James Zern committed
840

841
  add_proto qw/void vp9_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
842
  specialize qw/vp9_fdct32x32 sse2/;
843

844
  add_proto qw/void vp9_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
845
  specialize qw/vp9_fdct32x32_rd sse2/;
846
847
} else {
  add_proto qw/void vp9_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
848
  specialize qw/vp9_fht4x4 sse2 msa/;
849
850

  add_proto qw/void vp9_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
851
  specialize qw/vp9_fht8x8 sse2 msa/;
852
853

  add_proto qw/void vp9_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
854
  specialize qw/vp9_fht16x16 sse2 msa/;
855
856

  add_proto qw/void vp9_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
857
  specialize qw/vp9_fwht4x4 msa/, "$mmx_x86inc";
858
859
860
861
862

  add_proto qw/void vp9_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vp9_fdct4x4_1 sse2/;

  add_proto qw/void vp9_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
863
  specialize qw/vp9_fdct8x8_1 sse2 neon msa/;
864

865
  add_proto qw/void vp9_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
866
  specialize qw/vp9_fdct16x16_1 sse2 msa/;
867
868

  add_proto qw/void vp9_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
869
  specialize qw/vp9_fdct32x32_1 sse2 msa/;
870
871

  add_proto qw/void vp9_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
872
  specialize qw/vp9_fdct32x32 sse2 avx2 msa/;
873
874

  add_proto qw/void vp9_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
875
  specialize qw/vp9_fdct32x32_rd sse2 avx2 msa/;
876
}
James Zern's avatar
James Zern committed
877
878
879
880

#
# Motion search
#
881
add_proto qw/int vp9_full_search_sad/, "const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv";
James Zern's avatar
James Zern committed
882
883
884
885
specialize qw/vp9_full_search_sad sse3 sse4_1/;
$vp9_full_search_sad_sse3=vp9_full_search_sadx3;
$vp9_full_search_sad_sse4_1=vp9_full_search_sadx8;

886
add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, const struct search_site_config *cfg,  struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
887