vp9_rtcd_defs.pl 47.3 KB
Newer Older
James Zern's avatar
James Zern committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
sub vp9_common_forward_decls() {
print <<EOF
/*
 * VP9
 */

#include "vpx/vpx_integer.h"
#include "vp9/common/vp9_enums.h"

struct macroblockd;

/* Encoder forward decls */
struct macroblock;
struct vp9_variance_vtable;
15
struct search_site_config;
James Zern's avatar
James Zern committed
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
struct mv;
union int_mv;
struct yv12_buffer_config;
EOF
}
forward_decls qw/vp9_common_forward_decls/;

# x86inc.asm doesn't work if pic is enabled on 32 bit platforms so no assembly.
if (vpx_config("CONFIG_USE_X86INC") eq "yes") {
  $mmx_x86inc = 'mmx';
  $sse_x86inc = 'sse';
  $sse2_x86inc = 'sse2';
  $ssse3_x86inc = 'ssse3';
  $avx_x86inc = 'avx';
  $avx2_x86inc = 'avx2';
} else {
  $mmx_x86inc = $sse_x86inc = $sse2_x86inc = $ssse3_x86inc =
  $avx_x86inc = $avx2_x86inc = '';
}

# this variable is for functions that are 64 bit only.
if ($opts{arch} eq "x86_64") {
  $mmx_x86_64 = 'mmx';
  $sse2_x86_64 = 'sse2';
  $ssse3_x86_64 = 'ssse3';
  $avx_x86_64 = 'avx';
  $avx2_x86_64 = 'avx2';
} else {
  $mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 =
  $avx_x86_64 = $avx2_x86_64 = '';
}

#
# RECON
#
add_proto qw/void vp9_d207_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d207_predictor_4x4/, "$ssse3_x86inc";

add_proto qw/void vp9_d45_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d45_predictor_4x4/, "$ssse3_x86inc";

add_proto qw/void vp9_d63_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d63_predictor_4x4/, "$ssse3_x86inc";

add_proto qw/void vp9_h_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
61
62
specialize qw/vp9_h_predictor_4x4 neon_asm dspr2/, "$ssse3_x86inc";
$vp9_h_predictor_4x4_neon_asm=vp9_h_predictor_4x4_neon;
James Zern's avatar
James Zern committed
63
64
65
66
67
68
69
70
71
72
73

add_proto qw/void vp9_d117_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d117_predictor_4x4/;

add_proto qw/void vp9_d135_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d135_predictor_4x4/;

add_proto qw/void vp9_d153_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d153_predictor_4x4/, "$ssse3_x86inc";

add_proto qw/void vp9_v_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
74
75
specialize qw/vp9_v_predictor_4x4 neon_asm/, "$sse_x86inc";
$vp9_v_predictor_4x4_neon_asm=vp9_v_predictor_4x4_neon;
James Zern's avatar
James Zern committed
76
77

add_proto qw/void vp9_tm_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
78
79
specialize qw/vp9_tm_predictor_4x4 neon_asm dspr2/, "$sse_x86inc";
$vp9_tm_predictor_4x4_neon_asm=vp9_tm_predictor_4x4_neon;
James Zern's avatar
James Zern committed
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102

add_proto qw/void vp9_dc_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_dc_predictor_4x4 dspr2/, "$sse_x86inc";

add_proto qw/void vp9_dc_top_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_dc_top_predictor_4x4/;

add_proto qw/void vp9_dc_left_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_dc_left_predictor_4x4/;

add_proto qw/void vp9_dc_128_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_dc_128_predictor_4x4/;

add_proto qw/void vp9_d207_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d207_predictor_8x8/, "$ssse3_x86inc";

add_proto qw/void vp9_d45_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d45_predictor_8x8/, "$ssse3_x86inc";

add_proto qw/void vp9_d63_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d63_predictor_8x8/, "$ssse3_x86inc";

add_proto qw/void vp9_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
103
104
specialize qw/vp9_h_predictor_8x8 neon_asm dspr2/, "$ssse3_x86inc";
$vp9_h_predictor_8x8_neon_asm=vp9_h_predictor_8x8_neon;
James Zern's avatar
James Zern committed
105
106
107
108
109
110
111
112
113
114
115

add_proto qw/void vp9_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d117_predictor_8x8/;

add_proto qw/void vp9_d135_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d135_predictor_8x8/;

add_proto qw/void vp9_d153_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d153_predictor_8x8/, "$ssse3_x86inc";

add_proto qw/void vp9_v_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
116
117
specialize qw/vp9_v_predictor_8x8 neon_asm/, "$sse_x86inc";
$vp9_v_predictor_8x8_neon_asm=vp9_v_predictor_8x8_neon;
James Zern's avatar
James Zern committed
118
119

add_proto qw/void vp9_tm_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
120
121
specialize qw/vp9_tm_predictor_8x8 neon_asm dspr2/, "$sse2_x86inc";
$vp9_tm_predictor_8x8_neon_asm=vp9_tm_predictor_8x8_neon;
James Zern's avatar
James Zern committed
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144

add_proto qw/void vp9_dc_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_dc_predictor_8x8 dspr2/, "$sse_x86inc";

add_proto qw/void vp9_dc_top_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_dc_top_predictor_8x8/;

add_proto qw/void vp9_dc_left_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_dc_left_predictor_8x8/;

add_proto qw/void vp9_dc_128_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_dc_128_predictor_8x8/;

add_proto qw/void vp9_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d207_predictor_16x16/, "$ssse3_x86inc";

add_proto qw/void vp9_d45_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d45_predictor_16x16/, "$ssse3_x86inc";

add_proto qw/void vp9_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d63_predictor_16x16/, "$ssse3_x86inc";

add_proto qw/void vp9_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
145
146
specialize qw/vp9_h_predictor_16x16 neon_asm dspr2/, "$ssse3_x86inc";
$vp9_h_predictor_16x16_neon_asm=vp9_h_predictor_16x16_neon;
James Zern's avatar
James Zern committed
147
148
149
150
151
152
153
154
155
156
157

add_proto qw/void vp9_d117_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d117_predictor_16x16/;

add_proto qw/void vp9_d135_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d135_predictor_16x16/;

add_proto qw/void vp9_d153_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d153_predictor_16x16/, "$ssse3_x86inc";

add_proto qw/void vp9_v_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
158
159
specialize qw/vp9_v_predictor_16x16 neon_asm/, "$sse2_x86inc";
$vp9_v_predictor_16x16_neon_asm=vp9_v_predictor_16x16_neon;
James Zern's avatar
James Zern committed
160
161

add_proto qw/void vp9_tm_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
162
163
specialize qw/vp9_tm_predictor_16x16 neon_asm/, "$sse2_x86inc";
$vp9_tm_predictor_16x16_neon_asm=vp9_tm_predictor_16x16_neon;
James Zern's avatar
James Zern committed
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186

add_proto qw/void vp9_dc_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_dc_predictor_16x16 dspr2/, "$sse2_x86inc";

add_proto qw/void vp9_dc_top_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_dc_top_predictor_16x16/;

add_proto qw/void vp9_dc_left_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_dc_left_predictor_16x16/;

add_proto qw/void vp9_dc_128_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_dc_128_predictor_16x16/;

add_proto qw/void vp9_d207_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d207_predictor_32x32/, "$ssse3_x86inc";

add_proto qw/void vp9_d45_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d45_predictor_32x32/, "$ssse3_x86inc";

add_proto qw/void vp9_d63_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d63_predictor_32x32/, "$ssse3_x86inc";

add_proto qw/void vp9_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
187
188
specialize qw/vp9_h_predictor_32x32 neon_asm/, "$ssse3_x86inc";
$vp9_h_predictor_32x32_neon_asm=vp9_h_predictor_32x32_neon;
James Zern's avatar
James Zern committed
189
190
191
192
193
194
195
196
197
198
199

add_proto qw/void vp9_d117_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d117_predictor_32x32/;

add_proto qw/void vp9_d135_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d135_predictor_32x32/;

add_proto qw/void vp9_d153_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d153_predictor_32x32/;

add_proto qw/void vp9_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
200
201
specialize qw/vp9_v_predictor_32x32 neon_asm/, "$sse2_x86inc";
$vp9_v_predictor_32x32_neon_asm=vp9_v_predictor_32x32_neon;
James Zern's avatar
James Zern committed
202
203

add_proto qw/void vp9_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
204
205
specialize qw/vp9_tm_predictor_32x32 neon_asm/, "$sse2_x86_64";
$vp9_tm_predictor_32x32_neon_asm=vp9_tm_predictor_32x32_neon;
James Zern's avatar
James Zern committed
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222

add_proto qw/void vp9_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_dc_predictor_32x32/, "$sse2_x86inc";

add_proto qw/void vp9_dc_top_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_dc_top_predictor_32x32/;

add_proto qw/void vp9_dc_left_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_dc_left_predictor_32x32/;

add_proto qw/void vp9_dc_128_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_dc_128_predictor_32x32/;

#
# Loopfilter
#
add_proto qw/void vp9_lpf_vertical_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
223
224
specialize qw/vp9_lpf_vertical_16 sse2 neon_asm dspr2/;
$vp9_lpf_vertical_16_neon_asm=vp9_lpf_vertical_16_neon;
James Zern's avatar
James Zern committed
225
226

add_proto qw/void vp9_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
227
228
specialize qw/vp9_lpf_vertical_16_dual sse2 neon_asm dspr2/;
$vp9_lpf_vertical_16_dual_neon_asm=vp9_lpf_vertical_16_dual_neon;
James Zern's avatar
James Zern committed
229
230

add_proto qw/void vp9_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
231
232
specialize qw/vp9_lpf_vertical_8 sse2 neon_asm dspr2/;
$vp9_lpf_vertical_8_neon_asm=vp9_lpf_vertical_8_neon;
James Zern's avatar
James Zern committed
233
234

add_proto qw/void vp9_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
235
236
specialize qw/vp9_lpf_vertical_8_dual sse2 neon_asm dspr2/;
$vp9_lpf_vertical_8_dual_neon_asm=vp9_lpf_vertical_8_dual_neon;
James Zern's avatar
James Zern committed
237
238

add_proto qw/void vp9_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
239
240
specialize qw/vp9_lpf_vertical_4 mmx neon_asm dspr2/;
$vp9_lpf_vertical_4_neon_asm=vp9_lpf_vertical_4_neon;
James Zern's avatar
James Zern committed
241
242

add_proto qw/void vp9_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
243
244
specialize qw/vp9_lpf_vertical_4_dual sse2 neon_asm dspr2/;
$vp9_lpf_vertical_4_dual_neon_asm=vp9_lpf_vertical_4_dual_neon;
James Zern's avatar
James Zern committed
245
246

add_proto qw/void vp9_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
247
248
specialize qw/vp9_lpf_horizontal_16 sse2 avx2 neon_asm dspr2/;
$vp9_lpf_horizontal_16_neon_asm=vp9_lpf_horizontal_16_neon;
James Zern's avatar
James Zern committed
249
250

add_proto qw/void vp9_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
251
252
specialize qw/vp9_lpf_horizontal_8 sse2 neon_asm dspr2/;
$vp9_lpf_horizontal_8_neon_asm=vp9_lpf_horizontal_8_neon;
James Zern's avatar
James Zern committed
253
254

add_proto qw/void vp9_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
255
256
specialize qw/vp9_lpf_horizontal_8_dual sse2 neon_asm dspr2/;
$vp9_lpf_horizontal_8_dual_neon_asm=vp9_lpf_horizontal_8_dual_neon;
James Zern's avatar
James Zern committed
257
258

add_proto qw/void vp9_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
259
260
specialize qw/vp9_lpf_horizontal_4 mmx neon_asm dspr2/;
$vp9_lpf_horizontal_4_neon_asm=vp9_lpf_horizontal_4_neon;
James Zern's avatar
James Zern committed
261
262

add_proto qw/void vp9_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
263
264
specialize qw/vp9_lpf_horizontal_4_dual sse2 neon_asm dspr2/;
$vp9_lpf_horizontal_4_dual_neon_asm=vp9_lpf_horizontal_4_dual_neon;
James Zern's avatar
James Zern committed
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299

#
# post proc
#
if (vpx_config("CONFIG_VP9_POSTPROC") eq "yes") {
add_proto qw/void vp9_mbpost_proc_down/, "uint8_t *dst, int pitch, int rows, int cols, int flimit";
specialize qw/vp9_mbpost_proc_down mmx sse2/;
$vp9_mbpost_proc_down_sse2=vp9_mbpost_proc_down_xmm;

add_proto qw/void vp9_mbpost_proc_across_ip/, "uint8_t *src, int pitch, int rows, int cols, int flimit";
specialize qw/vp9_mbpost_proc_across_ip sse2/;
$vp9_mbpost_proc_across_ip_sse2=vp9_mbpost_proc_across_ip_xmm;

add_proto qw/void vp9_post_proc_down_and_across/, "const uint8_t *src_ptr, uint8_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit";
specialize qw/vp9_post_proc_down_and_across mmx sse2/;
$vp9_post_proc_down_and_across_sse2=vp9_post_proc_down_and_across_xmm;

add_proto qw/void vp9_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch";
specialize qw/vp9_plane_add_noise mmx sse2/;
$vp9_plane_add_noise_sse2=vp9_plane_add_noise_wmt;
}

add_proto qw/void vp9_blend_mb_inner/, "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride";
specialize qw/vp9_blend_mb_inner/;

add_proto qw/void vp9_blend_mb_outer/, "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride";
specialize qw/vp9_blend_mb_outer/;

add_proto qw/void vp9_blend_b/, "uint8_t *y, uint8_t *u, uint8_t *v, int y1, int u1, int v1, int alpha, int stride";
specialize qw/vp9_blend_b/;

#
# Sub Pixel Filters
#
add_proto qw/void vp9_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
300
301
specialize qw/vp9_convolve_copy neon_asm dspr2/, "$sse2_x86inc";
$vp9_convolve_copy_neon_asm=vp9_convolve_copy_neon;
James Zern's avatar
James Zern committed
302
303

add_proto qw/void vp9_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
304
305
specialize qw/vp9_convolve_avg neon_asm dspr2/, "$sse2_x86inc";
$vp9_convolve_avg_neon_asm=vp9_convolve_avg_neon;
James Zern's avatar
James Zern committed
306
307

add_proto qw/void vp9_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
James Zern's avatar
James Zern committed
308
specialize qw/vp9_convolve8 sse2 ssse3 neon_asm dspr2/;
309
$vp9_convolve8_neon_asm=vp9_convolve8_neon;
James Zern's avatar
James Zern committed
310
311

add_proto qw/void vp9_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
James Zern's avatar
James Zern committed
312
specialize qw/vp9_convolve8_horiz sse2 ssse3 neon_asm dspr2/;
313
$vp9_convolve8_horiz_neon_asm=vp9_convolve8_horiz_neon;
James Zern's avatar
James Zern committed
314
315

add_proto qw/void vp9_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
James Zern's avatar
James Zern committed
316
specialize qw/vp9_convolve8_vert sse2 ssse3 neon_asm dspr2/;
317
$vp9_convolve8_vert_neon_asm=vp9_convolve8_vert_neon;
James Zern's avatar
James Zern committed
318
319

add_proto qw/void vp9_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
320
321
specialize qw/vp9_convolve8_avg sse2 ssse3 neon_asm dspr2/;
$vp9_convolve8_avg_neon_asm=vp9_convolve8_avg_neon;
James Zern's avatar
James Zern committed
322
323

add_proto qw/void vp9_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
324
325
specialize qw/vp9_convolve8_avg_horiz sse2 ssse3 neon_asm dspr2/;
$vp9_convolve8_avg_horiz_neon_asm=vp9_convolve8_avg_horiz_neon;
James Zern's avatar
James Zern committed
326
327

add_proto qw/void vp9_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
328
329
specialize qw/vp9_convolve8_avg_vert sse2 ssse3 neon_asm dspr2/;
$vp9_convolve8_avg_vert_neon_asm=vp9_convolve8_avg_vert_neon;
James Zern's avatar
James Zern committed
330
331
332
333
334

#
# dct
#
add_proto qw/void vp9_idct4x4_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
335
336
specialize qw/vp9_idct4x4_1_add sse2 neon_asm dspr2/;
$vp9_idct4x4_1_add_neon_asm=vp9_idct4x4_1_add_neon;
James Zern's avatar
James Zern committed
337
338

add_proto qw/void vp9_idct4x4_16_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
339
340
specialize qw/vp9_idct4x4_16_add sse2 neon_asm dspr2/;
$vp9_idct4x4_16_add_neon_asm=vp9_idct4x4_16_add_neon;
James Zern's avatar
James Zern committed
341
342

add_proto qw/void vp9_idct8x8_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
343
344
specialize qw/vp9_idct8x8_1_add sse2 neon_asm dspr2/;
$vp9_idct8x8_1_add_neon_asm=vp9_idct8x8_1_add_neon;
James Zern's avatar
James Zern committed
345
346

add_proto qw/void vp9_idct8x8_64_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
347
348
specialize qw/vp9_idct8x8_64_add sse2 neon_asm dspr2/, "$ssse3_x86_64";
$vp9_idct8x8_64_add_neon_asm=vp9_idct8x8_64_add_neon;
James Zern's avatar
James Zern committed
349

350
add_proto qw/void vp9_idct8x8_12_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
351
352
specialize qw/vp9_idct8x8_12_add sse2 neon_asm dspr2/, "$ssse3_x86_64";
$vp9_idct8x8_12_add_neon_asm=vp9_idct8x8_12_add_neon;
James Zern's avatar
James Zern committed
353
354

add_proto qw/void vp9_idct16x16_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
355
356
specialize qw/vp9_idct16x16_1_add sse2 neon_asm dspr2/;
$vp9_idct16x16_1_add_neon_asm=vp9_idct16x16_1_add_neon;
James Zern's avatar
James Zern committed
357
358

add_proto qw/void vp9_idct16x16_256_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
359
specialize qw/vp9_idct16x16_256_add sse2 ssse3 neon_asm dspr2/;
360
$vp9_idct16x16_256_add_neon_asm=vp9_idct16x16_256_add_neon;
James Zern's avatar
James Zern committed
361
362

add_proto qw/void vp9_idct16x16_10_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
363
specialize qw/vp9_idct16x16_10_add sse2 ssse3 neon_asm dspr2/;
364
$vp9_idct16x16_10_add_neon_asm=vp9_idct16x16_10_add_neon;
James Zern's avatar
James Zern committed
365
366

add_proto qw/void vp9_idct32x32_1024_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
367
368
specialize qw/vp9_idct32x32_1024_add sse2 neon_asm dspr2/;
$vp9_idct32x32_1024_add_neon_asm=vp9_idct32x32_1024_add_neon;
James Zern's avatar
James Zern committed
369
370

add_proto qw/void vp9_idct32x32_34_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
371
372
specialize qw/vp9_idct32x32_34_add sse2 neon_asm dspr2/;
$vp9_idct32x32_34_add_neon_asm=vp9_idct32x32_1024_add_neon;
James Zern's avatar
James Zern committed
373
374

add_proto qw/void vp9_idct32x32_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
375
376
specialize qw/vp9_idct32x32_1_add sse2 neon_asm dspr2/;
$vp9_idct32x32_1_add_neon_asm=vp9_idct32x32_1_add_neon;
James Zern's avatar
James Zern committed
377
378

add_proto qw/void vp9_iht4x4_16_add/, "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type";
379
380
specialize qw/vp9_iht4x4_16_add sse2 neon_asm dspr2/;
$vp9_iht4x4_16_add_neon_asm=vp9_iht4x4_16_add_neon;
James Zern's avatar
James Zern committed
381
382

add_proto qw/void vp9_iht8x8_64_add/, "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type";
383
384
specialize qw/vp9_iht8x8_64_add sse2 neon_asm dspr2/;
$vp9_iht8x8_64_add_neon_asm=vp9_iht8x8_64_add_neon;
James Zern's avatar
James Zern committed
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404

add_proto qw/void vp9_iht16x16_256_add/, "const int16_t *input, uint8_t *output, int pitch, int tx_type";
specialize qw/vp9_iht16x16_256_add sse2 dspr2/;

# dct and add

add_proto qw/void vp9_iwht4x4_1_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_iwht4x4_1_add/;

add_proto qw/void vp9_iwht4x4_16_add/, "const int16_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_iwht4x4_16_add/;

#
# Encoder functions below this point.
#
if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") {


# variance
add_proto qw/unsigned int vp9_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
James Zern's avatar
James Zern committed
405
specialize qw/vp9_variance32x16 avx2/, "$sse2_x86inc";
James Zern's avatar
James Zern committed
406
407
408
409
410

add_proto qw/unsigned int vp9_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_variance16x32/, "$sse2_x86inc";

add_proto qw/unsigned int vp9_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
James Zern's avatar
James Zern committed
411
specialize qw/vp9_variance64x32 avx2/, "$sse2_x86inc";
James Zern's avatar
James Zern committed
412
413
414
415
416

add_proto qw/unsigned int vp9_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_variance32x64/, "$sse2_x86inc";

add_proto qw/unsigned int vp9_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
James Zern's avatar
James Zern committed
417
specialize qw/vp9_variance32x32 avx2/, "$sse2_x86inc";
James Zern's avatar
James Zern committed
418
419

add_proto qw/unsigned int vp9_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
James Zern's avatar
James Zern committed
420
specialize qw/vp9_variance64x64 avx2/, "$sse2_x86inc";
James Zern's avatar
James Zern committed
421
422

add_proto qw/unsigned int vp9_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
James Zern's avatar
James Zern committed
423
specialize qw/vp9_variance16x16 mmx avx2/, "$sse2_x86inc";
James Zern's avatar
James Zern committed
424
425
426
427
428
429
430
431
432
433

add_proto qw/unsigned int vp9_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_variance16x8 mmx/, "$sse2_x86inc";

add_proto qw/unsigned int vp9_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_variance8x16 mmx/, "$sse2_x86inc";

add_proto qw/unsigned int vp9_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_variance8x8 mmx/, "$sse2_x86inc";

434
435
436
437
438
add_proto qw/void vp9_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
specialize qw/vp9_get8x8var mmx/, "$sse2_x86inc";

add_proto qw/void vp9_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
specialize qw/vp9_get16x16var avx2/, "$sse2_x86inc";
439

James Zern's avatar
James Zern committed
440
441
442
443
444
445
446
447
448
449
add_proto qw/unsigned int vp9_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_variance8x4/, "$sse2_x86inc";

add_proto qw/unsigned int vp9_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_variance4x8/, "$sse2_x86inc";

add_proto qw/unsigned int vp9_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_variance4x4 mmx/, "$sse2_x86inc";

add_proto qw/unsigned int vp9_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
450
specialize qw/vp9_sub_pixel_variance64x64/, "$sse2_x86inc", "$ssse3_x86inc";
James Zern's avatar
James Zern committed
451
452

add_proto qw/unsigned int vp9_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
453
specialize qw/vp9_sub_pixel_avg_variance64x64/, "$sse2_x86inc", "$ssse3_x86inc";
James Zern's avatar
James Zern committed
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479

add_proto qw/unsigned int vp9_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_sub_pixel_variance32x64/, "$sse2_x86inc", "$ssse3_x86inc";

add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_sub_pixel_avg_variance32x64/, "$sse2_x86inc", "$ssse3_x86inc";

add_proto qw/unsigned int vp9_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_sub_pixel_variance64x32/, "$sse2_x86inc", "$ssse3_x86inc";

add_proto qw/unsigned int vp9_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_sub_pixel_avg_variance64x32/, "$sse2_x86inc", "$ssse3_x86inc";

add_proto qw/unsigned int vp9_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_sub_pixel_variance32x16/, "$sse2_x86inc", "$ssse3_x86inc";

add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_sub_pixel_avg_variance32x16/, "$sse2_x86inc", "$ssse3_x86inc";

add_proto qw/unsigned int vp9_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_sub_pixel_variance16x32/, "$sse2_x86inc", "$ssse3_x86inc";

add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_sub_pixel_avg_variance16x32/, "$sse2_x86inc", "$ssse3_x86inc";

add_proto qw/unsigned int vp9_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
480
specialize qw/vp9_sub_pixel_variance32x32/, "$sse2_x86inc", "$ssse3_x86inc";
James Zern's avatar
James Zern committed
481
482

add_proto qw/unsigned int vp9_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
483
specialize qw/vp9_sub_pixel_avg_variance32x32/, "$sse2_x86inc", "$ssse3_x86inc";
James Zern's avatar
James Zern committed
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528

add_proto qw/unsigned int vp9_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_sub_pixel_variance16x16/, "$sse2_x86inc", "$ssse3_x86inc";

add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_sub_pixel_avg_variance16x16/, "$sse2_x86inc", "$ssse3_x86inc";

add_proto qw/unsigned int vp9_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_sub_pixel_variance8x16/, "$sse2_x86inc", "$ssse3_x86inc";

add_proto qw/unsigned int vp9_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_sub_pixel_avg_variance8x16/, "$sse2_x86inc", "$ssse3_x86inc";

add_proto qw/unsigned int vp9_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_sub_pixel_variance16x8/, "$sse2_x86inc", "$ssse3_x86inc";

add_proto qw/unsigned int vp9_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_sub_pixel_avg_variance16x8/, "$sse2_x86inc", "$ssse3_x86inc";

add_proto qw/unsigned int vp9_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_sub_pixel_variance8x8/, "$sse2_x86inc", "$ssse3_x86inc";

add_proto qw/unsigned int vp9_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_sub_pixel_avg_variance8x8/, "$sse2_x86inc", "$ssse3_x86inc";

# TODO(jingning): need to convert 8x4/4x8 functions into mmx/sse form
add_proto qw/unsigned int vp9_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_sub_pixel_variance8x4/, "$sse2_x86inc", "$ssse3_x86inc";

add_proto qw/unsigned int vp9_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_sub_pixel_avg_variance8x4/, "$sse2_x86inc", "$ssse3_x86inc";

add_proto qw/unsigned int vp9_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_sub_pixel_variance4x8/, "$sse_x86inc", "$ssse3_x86inc";

add_proto qw/unsigned int vp9_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_sub_pixel_avg_variance4x8/, "$sse_x86inc", "$ssse3_x86inc";

add_proto qw/unsigned int vp9_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_sub_pixel_variance4x4/, "$sse_x86inc", "$ssse3_x86inc";
#vp9_sub_pixel_variance4x4_sse2=vp9_sub_pixel_variance4x4_wmt

add_proto qw/unsigned int vp9_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_sub_pixel_avg_variance4x4/, "$sse_x86inc", "$ssse3_x86inc";

529
add_proto qw/unsigned int vp9_sad64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride";
James Zern's avatar
James Zern committed
530
531
specialize qw/vp9_sad64x64/, "$sse2_x86inc";

532
add_proto qw/unsigned int vp9_sad32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
James Zern's avatar
James Zern committed
533
534
specialize qw/vp9_sad32x64/, "$sse2_x86inc";

535
add_proto qw/unsigned int vp9_sad64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
James Zern's avatar
James Zern committed
536
537
specialize qw/vp9_sad64x32/, "$sse2_x86inc";

538
add_proto qw/unsigned int vp9_sad32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
James Zern's avatar
James Zern committed
539
540
specialize qw/vp9_sad32x16/, "$sse2_x86inc";

541
add_proto qw/unsigned int vp9_sad16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
James Zern's avatar
James Zern committed
542
543
specialize qw/vp9_sad16x32/, "$sse2_x86inc";

544
add_proto qw/unsigned int vp9_sad32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride";
James Zern's avatar
James Zern committed
545
546
specialize qw/vp9_sad32x32/, "$sse2_x86inc";

547
add_proto qw/unsigned int vp9_sad16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride";
James Zern's avatar
James Zern committed
548
549
specialize qw/vp9_sad16x16 mmx/, "$sse2_x86inc";

550
add_proto qw/unsigned int vp9_sad16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride";
James Zern's avatar
James Zern committed
551
552
specialize qw/vp9_sad16x8 mmx/, "$sse2_x86inc";

553
add_proto qw/unsigned int vp9_sad8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride";
James Zern's avatar
James Zern committed
554
555
specialize qw/vp9_sad8x16 mmx/, "$sse2_x86inc";

556
add_proto qw/unsigned int vp9_sad8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride";
James Zern's avatar
James Zern committed
557
558
specialize qw/vp9_sad8x8 mmx/, "$sse2_x86inc";

559
add_proto qw/unsigned int vp9_sad8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
James Zern's avatar
James Zern committed
560
561
specialize qw/vp9_sad8x4/, "$sse2_x86inc";

562
add_proto qw/unsigned int vp9_sad4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride";
James Zern's avatar
James Zern committed
563
564
specialize qw/vp9_sad4x8/, "$sse_x86inc";

565
add_proto qw/unsigned int vp9_sad4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride";
James Zern's avatar
James Zern committed
566
567
specialize qw/vp9_sad4x4 mmx/, "$sse_x86inc";

568
add_proto qw/unsigned int vp9_sad64x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred";
James Zern's avatar
James Zern committed
569
570
specialize qw/vp9_sad64x64_avg/, "$sse2_x86inc";

571
add_proto qw/unsigned int vp9_sad32x64_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
James Zern's avatar
James Zern committed
572
573
specialize qw/vp9_sad32x64_avg/, "$sse2_x86inc";

574
add_proto qw/unsigned int vp9_sad64x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
James Zern's avatar
James Zern committed
575
576
specialize qw/vp9_sad64x32_avg/, "$sse2_x86inc";

577
add_proto qw/unsigned int vp9_sad32x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
James Zern's avatar
James Zern committed
578
579
specialize qw/vp9_sad32x16_avg/, "$sse2_x86inc";

580
add_proto qw/unsigned int vp9_sad16x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
James Zern's avatar
James Zern committed
581
582
specialize qw/vp9_sad16x32_avg/, "$sse2_x86inc";

583
add_proto qw/unsigned int vp9_sad32x32_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred";
James Zern's avatar
James Zern committed
584
585
specialize qw/vp9_sad32x32_avg/, "$sse2_x86inc";

586
add_proto qw/unsigned int vp9_sad16x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred";
James Zern's avatar
James Zern committed
587
588
specialize qw/vp9_sad16x16_avg/, "$sse2_x86inc";

589
add_proto qw/unsigned int vp9_sad16x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred";
James Zern's avatar
James Zern committed
590
591
specialize qw/vp9_sad16x8_avg/, "$sse2_x86inc";

592
add_proto qw/unsigned int vp9_sad8x16_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred";
James Zern's avatar
James Zern committed
593
594
specialize qw/vp9_sad8x16_avg/, "$sse2_x86inc";

595
add_proto qw/unsigned int vp9_sad8x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred";
James Zern's avatar
James Zern committed
596
597
specialize qw/vp9_sad8x8_avg/, "$sse2_x86inc";

598
add_proto qw/unsigned int vp9_sad8x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
James Zern's avatar
James Zern committed
599
600
specialize qw/vp9_sad8x4_avg/, "$sse2_x86inc";

601
add_proto qw/unsigned int vp9_sad4x8_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
James Zern's avatar
James Zern committed
602
603
specialize qw/vp9_sad4x8_avg/, "$sse_x86inc";

604
add_proto qw/unsigned int vp9_sad4x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred";
James Zern's avatar
James Zern committed
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
specialize qw/vp9_sad4x4_avg/, "$sse_x86inc";

add_proto qw/void vp9_sad64x64x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array";
specialize qw/vp9_sad64x64x3/;

add_proto qw/void vp9_sad32x32x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array";
specialize qw/vp9_sad32x32x3/;

add_proto qw/void vp9_sad16x16x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array";
specialize qw/vp9_sad16x16x3 sse3 ssse3/;

add_proto qw/void vp9_sad16x8x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array";
specialize qw/vp9_sad16x8x3 sse3 ssse3/;

add_proto qw/void vp9_sad8x16x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array";
specialize qw/vp9_sad8x16x3 sse3/;

add_proto qw/void vp9_sad8x8x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array";
specialize qw/vp9_sad8x8x3 sse3/;

add_proto qw/void vp9_sad4x4x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array";
specialize qw/vp9_sad4x4x3 sse3/;

add_proto qw/void vp9_sad64x64x8/, "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array";
specialize qw/vp9_sad64x64x8/;

add_proto qw/void vp9_sad32x32x8/, "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array";
specialize qw/vp9_sad32x32x8/;

add_proto qw/void vp9_sad16x16x8/, "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array";
specialize qw/vp9_sad16x16x8 sse4/;

add_proto qw/void vp9_sad16x8x8/, "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array";
specialize qw/vp9_sad16x8x8 sse4/;

add_proto qw/void vp9_sad8x16x8/, "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array";
specialize qw/vp9_sad8x16x8 sse4/;

add_proto qw/void vp9_sad8x8x8/, "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array";
specialize qw/vp9_sad8x8x8 sse4/;

add_proto qw/void vp9_sad8x4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
specialize qw/vp9_sad8x4x8/;

add_proto qw/void vp9_sad4x8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array";
specialize qw/vp9_sad4x8x8/;

add_proto qw/void vp9_sad4x4x8/, "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int  ref_stride, uint32_t *sad_array";
specialize qw/vp9_sad4x4x8 sse4/;

add_proto qw/void vp9_sad64x64x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
656
specialize qw/vp9_sad64x64x4d sse2/;
James Zern's avatar
James Zern committed
657
658
659
660
661
662
663
664
665
666
667
668
669
670

add_proto qw/void vp9_sad32x64x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
specialize qw/vp9_sad32x64x4d sse2/;

add_proto qw/void vp9_sad64x32x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
specialize qw/vp9_sad64x32x4d sse2/;

add_proto qw/void vp9_sad32x16x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
specialize qw/vp9_sad32x16x4d sse2/;

add_proto qw/void vp9_sad16x32x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
specialize qw/vp9_sad16x32x4d sse2/;

add_proto qw/void vp9_sad32x32x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
671
specialize qw/vp9_sad32x32x4d sse2/;
James Zern's avatar
James Zern committed
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695

add_proto qw/void vp9_sad16x16x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
specialize qw/vp9_sad16x16x4d sse2/;

add_proto qw/void vp9_sad16x8x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
specialize qw/vp9_sad16x8x4d sse2/;

add_proto qw/void vp9_sad8x16x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
specialize qw/vp9_sad8x16x4d sse2/;

add_proto qw/void vp9_sad8x8x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
specialize qw/vp9_sad8x8x4d sse2/;

# TODO(jingning): need to convert these 4x8/8x4 functions into sse2 form
add_proto qw/void vp9_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
specialize qw/vp9_sad8x4x4d sse2/;

add_proto qw/void vp9_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array";
specialize qw/vp9_sad4x8x4d sse/;

add_proto qw/void vp9_sad4x4x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
specialize qw/vp9_sad4x4x4d sse/;

add_proto qw/unsigned int vp9_mse16x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
James Zern's avatar
James Zern committed
696
specialize qw/vp9_mse16x16 mmx avx2/, "$sse2_x86inc";
James Zern's avatar
James Zern committed
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711

add_proto qw/unsigned int vp9_mse8x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
specialize qw/vp9_mse8x16/;

add_proto qw/unsigned int vp9_mse16x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
specialize qw/vp9_mse16x8/;

add_proto qw/unsigned int vp9_mse8x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
specialize qw/vp9_mse8x8/;

add_proto qw/unsigned int vp9_get_mb_ss/, "const int16_t *";
specialize qw/vp9_get_mb_ss mmx sse2/;
# ENCODEMB INVOKE

add_proto qw/int64_t vp9_block_error/, "const int16_t *coeff, const int16_t *dqcoeff, intptr_t block_size, int64_t *ssz";
712
specialize qw/vp9_block_error avx2/, "$sse2_x86inc";
James Zern's avatar
James Zern committed
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741

add_proto qw/void vp9_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
specialize qw/vp9_subtract_block/, "$sse2_x86inc";

add_proto qw/void vp9_quantize_b/, "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp9_quantize_b/, "$ssse3_x86_64";

add_proto qw/void vp9_quantize_b_32x32/, "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp9_quantize_b_32x32/, "$ssse3_x86_64";

#
# Structured Similarity (SSIM)
#
if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") {
    add_proto qw/void vp9_ssim_parms_8x8/, "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr";
    specialize qw/vp9_ssim_parms_8x8/, "$sse2_x86_64";

    add_proto qw/void vp9_ssim_parms_16x16/, "uint8_t *s, int sp, uint8_t *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr";
    specialize qw/vp9_ssim_parms_16x16/, "$sse2_x86_64";
}

# fdct functions
add_proto qw/void vp9_fht4x4/, "const int16_t *input, int16_t *output, int stride, int tx_type";
specialize qw/vp9_fht4x4 sse2 avx2/;

add_proto qw/void vp9_fht8x8/, "const int16_t *input, int16_t *output, int stride, int tx_type";
specialize qw/vp9_fht8x8 sse2 avx2/;

add_proto qw/void vp9_fht16x16/, "const int16_t *input, int16_t *output, int stride, int tx_type";
742
specialize qw/vp9_fht16x16 sse2/;
James Zern's avatar
James Zern committed
743
744

add_proto qw/void vp9_fwht4x4/, "const int16_t *input, int16_t *output, int stride";
Alex Converse's avatar
Alex Converse committed
745
specialize qw/vp9_fwht4x4/, "$mmx_x86inc";
James Zern's avatar
James Zern committed
746

747
748
749
add_proto qw/void vp9_fdct4x4_1/, "const int16_t *input, int16_t *output, int stride";
specialize qw/vp9_fdct4x4_1 sse2/;

James Zern's avatar
James Zern committed
750
751
752
add_proto qw/void vp9_fdct4x4/, "const int16_t *input, int16_t *output, int stride";
specialize qw/vp9_fdct4x4 sse2 avx2/;

753
754
755
add_proto qw/void vp9_fdct8x8_1/, "const int16_t *input, int16_t *output, int stride";
specialize qw/vp9_fdct8x8_1 sse2/;

James Zern's avatar
James Zern committed
756
add_proto qw/void vp9_fdct8x8/, "const int16_t *input, int16_t *output, int stride";
757
specialize qw/vp9_fdct8x8 sse2 avx2/, "$ssse3_x86_64";
James Zern's avatar
James Zern committed
758

759
760
761
add_proto qw/void vp9_fdct16x16_1/, "const int16_t *input, int16_t *output, int stride";
specialize qw/vp9_fdct16x16_1 sse2/;

James Zern's avatar
James Zern committed
762
add_proto qw/void vp9_fdct16x16/, "const int16_t *input, int16_t *output, int stride";
763
specialize qw/vp9_fdct16x16 sse2/;
James Zern's avatar
James Zern committed
764

765
766
767
add_proto qw/void vp9_fdct32x32_1/, "const int16_t *input, int16_t *output, int stride";
specialize qw/vp9_fdct32x32_1 sse2/;

James Zern's avatar
James Zern committed
768
769
770
771
772
773
774
775
776
add_proto qw/void vp9_fdct32x32/, "const int16_t *input, int16_t *output, int stride";
specialize qw/vp9_fdct32x32 sse2 avx2/;

add_proto qw/void vp9_fdct32x32_rd/, "const int16_t *input, int16_t *output, int stride";
specialize qw/vp9_fdct32x32_rd sse2 avx2/;

#
# Motion search
#
777
add_proto qw/int vp9_full_search_sad/, "const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv";
James Zern's avatar
James Zern committed
778
779
780
781
specialize qw/vp9_full_search_sad sse3 sse4_1/;
$vp9_full_search_sad_sse3=vp9_full_search_sadx3;
$vp9_full_search_sad_sse4_1=vp9_full_search_sadx8;

782
add_proto qw/int vp9_refining_search_sad/, "const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
James Zern's avatar
James Zern committed
783
784
785
specialize qw/vp9_refining_search_sad sse3/;
$vp9_refining_search_sad_sse3=vp9_refining_search_sadx4;

786
add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, const struct search_site_config *cfg,  struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
James Zern's avatar
James Zern committed
787
788
789
specialize qw/vp9_diamond_search_sad sse3/;
$vp9_diamond_search_sad_sse3=vp9_diamond_search_sadx4;

790
add_proto qw/int vp9_full_range_search/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
James Zern's avatar
James Zern committed
791
792
specialize qw/vp9_full_range_search/;

793
add_proto qw/void vp9_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
James Zern's avatar
James Zern committed
794
795
796
797
798
specialize qw/vp9_temporal_filter_apply sse2/;

}
# end encoder functions
1;