vpx_dsp_rtcd_defs.pl 124 KB
Newer Older
Johann's avatar
Johann committed
1
2
3
4
5
6
7
sub vpx_dsp_forward_decls() {
print <<EOF
/*
 * DSP
 */

#include "vpx/vpx_integer.h"
8
#include "vpx_dsp/vpx_dsp_common.h"
Johann's avatar
Johann committed
9
10
11
12
13

EOF
}
forward_decls qw/vpx_dsp_forward_decls/;

14
15
16
17
18
19
# x86inc.asm had specific constraints. break it out so it's easy to disable.
# zero all the variables to avoid tricky else conditions.
$mmx_x86inc = $sse_x86inc = $sse2_x86inc = $ssse3_x86inc = $avx_x86inc =
  $avx2_x86inc = '';
$mmx_x86_64_x86inc = $sse_x86_64_x86inc = $sse2_x86_64_x86inc =
  $ssse3_x86_64_x86inc = $avx_x86_64_x86inc = $avx2_x86_64_x86inc = '';
Johann's avatar
Johann committed
20
21
22
23
24
25
26
if (vpx_config("CONFIG_USE_X86INC") eq "yes") {
  $mmx_x86inc = 'mmx';
  $sse_x86inc = 'sse';
  $sse2_x86inc = 'sse2';
  $ssse3_x86inc = 'ssse3';
  $avx_x86inc = 'avx';
  $avx2_x86inc = 'avx2';
27
28
29
30
31
32
33
34
  if ($opts{arch} eq "x86_64") {
    $mmx_x86_64_x86inc = 'mmx';
    $sse_x86_64_x86inc = 'sse';
    $sse2_x86_64_x86inc = 'sse2';
    $ssse3_x86_64_x86inc = 'ssse3';
    $avx_x86_64_x86inc = 'avx';
    $avx2_x86_64_x86inc = 'avx2';
  }
Johann's avatar
Johann committed
35
36
}

Zoe Liu's avatar
Zoe Liu committed
37
38
39
40
41
42
# optimizations which depend on multiple features
$avx2_ssse3 = '';
if ((vpx_config("HAVE_AVX2") eq "yes") && (vpx_config("HAVE_SSSE3") eq "yes")) {
  $avx2_ssse3 = 'avx2';
}

43
44
# functions that are 64 bit only.
$mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 = $avx_x86_64 = $avx2_x86_64 = '';
Johann's avatar
Johann committed
45
46
47
48
49
50
51
52
if ($opts{arch} eq "x86_64") {
  $mmx_x86_64 = 'mmx';
  $sse2_x86_64 = 'sse2';
  $ssse3_x86_64 = 'ssse3';
  $avx_x86_64 = 'avx';
  $avx2_x86_64 = 'avx2';
}

53
54
55
56
#
# Intra prediction
#

57
58
add_proto qw/void vpx_d207_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d207_predictor_4x4/, "$ssse3_x86inc";
59

60
61
62
add_proto qw/void vpx_d207e_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d207e_predictor_4x4/;

63
64
add_proto qw/void vpx_d45_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d45_predictor_4x4 neon/, "$ssse3_x86inc";
65

66
67
68
add_proto qw/void vpx_d45e_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d45e_predictor_4x4/;

69
70
add_proto qw/void vpx_d63_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d63_predictor_4x4/, "$ssse3_x86inc";
71

72
73
74
add_proto qw/void vpx_d63e_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d63e_predictor_4x4/;

75
76
77
add_proto qw/void vpx_d63f_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d63f_predictor_4x4/;

78
add_proto qw/void vpx_h_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
Jian Zhou's avatar
Jian Zhou committed
79
specialize qw/vpx_h_predictor_4x4 neon dspr2 msa/, "$sse2_x86inc";
80

81
82
83
add_proto qw/void vpx_he_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_he_predictor_4x4/;

84
85
add_proto qw/void vpx_d117_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d117_predictor_4x4/;
86

87
88
add_proto qw/void vpx_d135_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d135_predictor_4x4 neon/;
89

90
91
add_proto qw/void vpx_d153_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d153_predictor_4x4/, "$ssse3_x86inc";
92

93
add_proto qw/void vpx_v_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
94
specialize qw/vpx_v_predictor_4x4 neon msa/, "$sse2_x86inc";
95

96
97
98
add_proto qw/void vpx_ve_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_ve_predictor_4x4/;

99
add_proto qw/void vpx_tm_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
Jian Zhou's avatar
Jian Zhou committed
100
specialize qw/vpx_tm_predictor_4x4 neon dspr2 msa/, "$sse2_x86inc";
101

102
add_proto qw/void vpx_dc_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
103
specialize qw/vpx_dc_predictor_4x4 dspr2 msa neon/, "$sse2_x86inc";
104

105
add_proto qw/void vpx_dc_top_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
106
specialize qw/vpx_dc_top_predictor_4x4 msa neon/, "$sse2_x86inc";
107

108
add_proto qw/void vpx_dc_left_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
109
specialize qw/vpx_dc_left_predictor_4x4 msa neon/, "$sse2_x86inc";
110

111
add_proto qw/void vpx_dc_128_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
112
specialize qw/vpx_dc_128_predictor_4x4 msa neon/, "$sse2_x86inc";
113

114
115
add_proto qw/void vpx_d207_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d207_predictor_8x8/, "$ssse3_x86inc";
116

117
118
119
add_proto qw/void vpx_d207e_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d207e_predictor_8x8/;

120
121
add_proto qw/void vpx_d45_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d45_predictor_8x8 neon/, "$ssse3_x86inc";
122

123
124
125
add_proto qw/void vpx_d45e_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d45e_predictor_8x8/;

126
127
add_proto qw/void vpx_d63_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d63_predictor_8x8/, "$ssse3_x86inc";
128

129
130
131
add_proto qw/void vpx_d63e_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d63e_predictor_8x8/;

132
add_proto qw/void vpx_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
Jian Zhou's avatar
Jian Zhou committed
133
specialize qw/vpx_h_predictor_8x8 neon dspr2 msa/, "$sse2_x86inc";
134

135
136
add_proto qw/void vpx_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d117_predictor_8x8/;
137

138
139
add_proto qw/void vpx_d135_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d135_predictor_8x8/;
140

141
142
add_proto qw/void vpx_d153_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d153_predictor_8x8/, "$ssse3_x86inc";
143

144
add_proto qw/void vpx_v_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
145
specialize qw/vpx_v_predictor_8x8 neon msa/, "$sse2_x86inc";
146

147
148
add_proto qw/void vpx_tm_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_tm_predictor_8x8 neon dspr2 msa/, "$sse2_x86inc";
149

150
add_proto qw/void vpx_dc_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
151
specialize qw/vpx_dc_predictor_8x8 dspr2 neon msa/, "$sse2_x86inc";
152

153
add_proto qw/void vpx_dc_top_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
154
specialize qw/vpx_dc_top_predictor_8x8 neon msa/, "$sse2_x86inc";
155

156
add_proto qw/void vpx_dc_left_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
157
specialize qw/vpx_dc_left_predictor_8x8 neon msa/, "$sse2_x86inc";
158

159
add_proto qw/void vpx_dc_128_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
160
specialize qw/vpx_dc_128_predictor_8x8 neon msa/, "$sse2_x86inc";
161

162
163
add_proto qw/void vpx_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d207_predictor_16x16/, "$ssse3_x86inc";
164

165
166
167
add_proto qw/void vpx_d207e_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d207e_predictor_16x16/;

168
169
add_proto qw/void vpx_d45_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d45_predictor_16x16 neon/, "$ssse3_x86inc";
170

171
172
173
add_proto qw/void vpx_d45e_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d45e_predictor_16x16/;

174
175
add_proto qw/void vpx_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d63_predictor_16x16/, "$ssse3_x86inc";
176

177
178
179
add_proto qw/void vpx_d63e_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d63e_predictor_16x16/;

180
add_proto qw/void vpx_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
Jian Zhou's avatar
Jian Zhou committed
181
specialize qw/vpx_h_predictor_16x16 neon dspr2 msa/, "$sse2_x86inc";
182

183
184
add_proto qw/void vpx_d117_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d117_predictor_16x16/;
185

186
187
add_proto qw/void vpx_d135_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d135_predictor_16x16/;
188

189
190
add_proto qw/void vpx_d153_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d153_predictor_16x16/, "$ssse3_x86inc";
191

192
193
add_proto qw/void vpx_v_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_v_predictor_16x16 neon msa/, "$sse2_x86inc";
194

195
196
add_proto qw/void vpx_tm_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_tm_predictor_16x16 neon msa/, "$sse2_x86inc";
197

198
199
add_proto qw/void vpx_dc_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_dc_predictor_16x16 dspr2 neon msa/, "$sse2_x86inc";
200

201
202
add_proto qw/void vpx_dc_top_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_dc_top_predictor_16x16 neon msa/, "$sse2_x86inc";
203

204
205
add_proto qw/void vpx_dc_left_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_dc_left_predictor_16x16 neon msa/, "$sse2_x86inc";
206

207
208
add_proto qw/void vpx_dc_128_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_dc_128_predictor_16x16 neon msa/, "$sse2_x86inc";
209

210
211
add_proto qw/void vpx_d207_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d207_predictor_32x32/, "$ssse3_x86inc";
212

213
214
215
add_proto qw/void vpx_d207e_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d207e_predictor_32x32/;

216
217
add_proto qw/void vpx_d45_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d45_predictor_32x32/, "$ssse3_x86inc";
218

219
220
221
add_proto qw/void vpx_d45e_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d45e_predictor_32x32/;

222
223
add_proto qw/void vpx_d63_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d63_predictor_32x32/, "$ssse3_x86inc";
224

225
226
227
add_proto qw/void vpx_d63e_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d63e_predictor_32x32/;

228
add_proto qw/void vpx_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
Jian Zhou's avatar
Jian Zhou committed
229
specialize qw/vpx_h_predictor_32x32 neon msa/, "$sse2_x86inc";
230

231
232
add_proto qw/void vpx_d117_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d117_predictor_32x32/;
233

234
235
add_proto qw/void vpx_d135_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d135_predictor_32x32/;
236

237
238
add_proto qw/void vpx_d153_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d153_predictor_32x32/, "$ssse3_x86inc";
239

240
241
add_proto qw/void vpx_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_v_predictor_32x32 neon msa/, "$sse2_x86inc";
242

243
add_proto qw/void vpx_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
Jian Zhou's avatar
Jian Zhou committed
244
specialize qw/vpx_tm_predictor_32x32 neon msa/, "$sse2_x86inc";
245

246
247
add_proto qw/void vpx_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_dc_predictor_32x32 msa neon/, "$sse2_x86inc";
248

249
250
add_proto qw/void vpx_dc_top_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_dc_top_predictor_32x32 msa neon/, "$sse2_x86inc";
251

252
253
add_proto qw/void vpx_dc_left_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_dc_left_predictor_32x32 msa neon/, "$sse2_x86inc";
254

255
256
add_proto qw/void vpx_dc_128_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_dc_128_predictor_32x32 msa neon/, "$sse2_x86inc";
257
258

# High bitdepth functions
259
260
261
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
  add_proto qw/void vpx_highbd_d207_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_d207_predictor_4x4/;
262

263
264
265
  add_proto qw/void vpx_highbd_d207e_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_d207e_predictor_4x4/;

266
267
  add_proto qw/void vpx_highbd_d45_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_d45_predictor_4x4/;
268

269
270
271
  add_proto qw/void vpx_highbd_d45e_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_d45e_predictor_4x4/;

272
273
  add_proto qw/void vpx_highbd_d63_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_d63_predictor_4x4/;
274

275
276
277
  add_proto qw/void vpx_highbd_d63e_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_d63e_predictor_4x4/;

278
279
  add_proto qw/void vpx_highbd_h_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_h_predictor_4x4/;
280

281
282
  add_proto qw/void vpx_highbd_d117_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_d117_predictor_4x4/;
283

284
285
  add_proto qw/void vpx_highbd_d135_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_d135_predictor_4x4/;
286

287
288
  add_proto qw/void vpx_highbd_d153_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_d153_predictor_4x4/;
289

290
  add_proto qw/void vpx_highbd_v_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
291
  specialize qw/vpx_highbd_v_predictor_4x4/, "$sse2_x86inc";
292

293
  add_proto qw/void vpx_highbd_tm_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
294
  specialize qw/vpx_highbd_tm_predictor_4x4/, "$sse2_x86inc";
295

296
  add_proto qw/void vpx_highbd_dc_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
297
  specialize qw/vpx_highbd_dc_predictor_4x4/, "$sse2_x86inc";
298

299
300
  add_proto qw/void vpx_highbd_dc_top_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_dc_top_predictor_4x4/;
301

302
303
  add_proto qw/void vpx_highbd_dc_left_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_dc_left_predictor_4x4/;
304

305
306
  add_proto qw/void vpx_highbd_dc_128_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_dc_128_predictor_4x4/;
307

308
309
  add_proto qw/void vpx_highbd_d207_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_d207_predictor_8x8/;
310

311
312
313
  add_proto qw/void vpx_highbd_d207e_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_d207e_predictor_8x8/;

314
315
  add_proto qw/void vpx_highbd_d45_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_d45_predictor_8x8/;
316

317
318
319
  add_proto qw/void vpx_highbd_d45e_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_d45e_predictor_8x8/;

320
321
  add_proto qw/void vpx_highbd_d63_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_d63_predictor_8x8/;
322

323
324
325
  add_proto qw/void vpx_highbd_d63e_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_d63e_predictor_8x8/;

326
327
  add_proto qw/void vpx_highbd_h_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_h_predictor_8x8/;
328

329
330
  add_proto qw/void vpx_highbd_d117_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_d117_predictor_8x8/;
331

332
333
  add_proto qw/void vpx_highbd_d135_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_d135_predictor_8x8/;
334

335
336
  add_proto qw/void vpx_highbd_d153_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_d153_predictor_8x8/;
337

338
339
  add_proto qw/void vpx_highbd_v_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_v_predictor_8x8/, "$sse2_x86inc";
340

341
342
  add_proto qw/void vpx_highbd_tm_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_tm_predictor_8x8/, "$sse2_x86inc";
343

344
345
  add_proto qw/void vpx_highbd_dc_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_dc_predictor_8x8/, "$sse2_x86inc";;
346

347
348
  add_proto qw/void vpx_highbd_dc_top_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_dc_top_predictor_8x8/;
349

350
351
  add_proto qw/void vpx_highbd_dc_left_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_dc_left_predictor_8x8/;
352

353
354
  add_proto qw/void vpx_highbd_dc_128_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_dc_128_predictor_8x8/;
355

356
357
  add_proto qw/void vpx_highbd_d207_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_d207_predictor_16x16/;
358

359
360
361
  add_proto qw/void vpx_highbd_d207e_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_d207e_predictor_16x16/;

362
363
  add_proto qw/void vpx_highbd_d45_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_d45_predictor_16x16/;
364

365
366
367
  add_proto qw/void vpx_highbd_d45e_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_d45e_predictor_16x16/;

368
369
  add_proto qw/void vpx_highbd_d63_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_d63_predictor_16x16/;
370

371
372
373
  add_proto qw/void vpx_highbd_d63e_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_d63e_predictor_16x16/;

374
375
  add_proto qw/void vpx_highbd_h_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_h_predictor_16x16/;
376

377
378
  add_proto qw/void vpx_highbd_d117_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_d117_predictor_16x16/;
379

380
381
  add_proto qw/void vpx_highbd_d135_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_d135_predictor_16x16/;
382

383
384
  add_proto qw/void vpx_highbd_d153_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_d153_predictor_16x16/;
385

386
387
  add_proto qw/void vpx_highbd_v_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_v_predictor_16x16/, "$sse2_x86inc";
388

389
  add_proto qw/void vpx_highbd_tm_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
390
  specialize qw/vpx_highbd_tm_predictor_16x16/, "$sse2_x86inc";
391

392
393
  add_proto qw/void vpx_highbd_dc_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_dc_predictor_16x16/, "$sse2_x86inc";
394

395
396
  add_proto qw/void vpx_highbd_dc_top_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_dc_top_predictor_16x16/;
397

398
399
  add_proto qw/void vpx_highbd_dc_left_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_dc_left_predictor_16x16/;
400

401
402
  add_proto qw/void vpx_highbd_dc_128_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_dc_128_predictor_16x16/;
403

404
405
  add_proto qw/void vpx_highbd_d207_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_d207_predictor_32x32/;
406

407
408
409
  add_proto qw/void vpx_highbd_d207e_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_d207e_predictor_32x32/;

410
411
  add_proto qw/void vpx_highbd_d45_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_d45_predictor_32x32/;
412

413
414
415
  add_proto qw/void vpx_highbd_d45e_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_d45e_predictor_32x32/;

416
417
  add_proto qw/void vpx_highbd_d63_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_d63_predictor_32x32/;
418

419
420
421
  add_proto qw/void vpx_highbd_d63e_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_d63e_predictor_32x32/;

422
423
  add_proto qw/void vpx_highbd_h_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_h_predictor_32x32/;
424

425
426
  add_proto qw/void vpx_highbd_d117_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_d117_predictor_32x32/;
427

428
429
  add_proto qw/void vpx_highbd_d135_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_d135_predictor_32x32/;
430

431
432
  add_proto qw/void vpx_highbd_d153_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_d153_predictor_32x32/;
433

434
435
  add_proto qw/void vpx_highbd_v_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_v_predictor_32x32/, "$sse2_x86inc";
436

437
  add_proto qw/void vpx_highbd_tm_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
438
  specialize qw/vpx_highbd_tm_predictor_32x32/, "$sse2_x86inc";
439

440
  add_proto qw/void vpx_highbd_dc_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
441
  specialize qw/vpx_highbd_dc_predictor_32x32/, "$sse2_x86inc";
442

443
444
  add_proto qw/void vpx_highbd_dc_top_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_dc_top_predictor_32x32/;
445

446
447
  add_proto qw/void vpx_highbd_dc_left_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_dc_left_predictor_32x32/;
448

449
450
451
  add_proto qw/void vpx_highbd_dc_128_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
  specialize qw/vpx_highbd_dc_128_predictor_32x32/;
}  # CONFIG_VP9_HIGHBITDEPTH
452

Zoe Liu's avatar
Zoe Liu committed
453
454
455
456
#
# Sub Pixel Filters
#
add_proto qw/void vpx_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
Zoe Liu's avatar
Zoe Liu committed
457
specialize qw/vpx_convolve_copy neon dspr2 msa/, "$sse2_x86inc";
Zoe Liu's avatar
Zoe Liu committed
458
459

add_proto qw/void vpx_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
Zoe Liu's avatar
Zoe Liu committed
460
specialize qw/vpx_convolve_avg neon dspr2 msa/, "$sse2_x86inc";
Zoe Liu's avatar
Zoe Liu committed
461
462

add_proto qw/void vpx_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
Zoe Liu's avatar
Zoe Liu committed
463
specialize qw/vpx_convolve8 sse2 ssse3 neon dspr2 msa/, "$avx2_ssse3";
Zoe Liu's avatar
Zoe Liu committed
464
465

add_proto qw/void vpx_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
Zoe Liu's avatar
Zoe Liu committed
466
specialize qw/vpx_convolve8_horiz sse2 ssse3 neon dspr2 msa/, "$avx2_ssse3";
Zoe Liu's avatar
Zoe Liu committed
467
468

add_proto qw/void vpx_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
Zoe Liu's avatar
Zoe Liu committed
469
specialize qw/vpx_convolve8_vert sse2 ssse3 neon dspr2 msa/, "$avx2_ssse3";
Zoe Liu's avatar
Zoe Liu committed
470
471

add_proto qw/void vpx_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
Zoe Liu's avatar
Zoe Liu committed
472
specialize qw/vpx_convolve8_avg sse2 ssse3 neon dspr2 msa/;
Zoe Liu's avatar
Zoe Liu committed
473
474

add_proto qw/void vpx_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
Zoe Liu's avatar
Zoe Liu committed
475
specialize qw/vpx_convolve8_avg_horiz sse2 ssse3 neon dspr2 msa/;
Zoe Liu's avatar
Zoe Liu committed
476
477

add_proto qw/void vpx_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
Zoe Liu's avatar
Zoe Liu committed
478
specialize qw/vpx_convolve8_avg_vert sse2 ssse3 neon dspr2 msa/;
Zoe Liu's avatar
Zoe Liu committed
479

480
add_proto qw/void vpx_scaled_2d/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
Scott LaVarnway's avatar
Scott LaVarnway committed
481
specialize qw/vpx_scaled_2d ssse3/;
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497

add_proto qw/void vpx_scaled_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
specialize qw/vpx_scaled_horiz/;

add_proto qw/void vpx_scaled_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
specialize qw/vpx_scaled_vert/;

add_proto qw/void vpx_scaled_avg_2d/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
specialize qw/vpx_scaled_avg_2d/;

add_proto qw/void vpx_scaled_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
specialize qw/vpx_scaled_avg_horiz/;

add_proto qw/void vpx_scaled_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
specialize qw/vpx_scaled_avg_vert/;

Zoe Liu's avatar
Zoe Liu committed
498
499
500
501
502
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
  #
  # Sub Pixel Filters
  #
  add_proto qw/void vpx_highbd_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
503
  specialize qw/vpx_highbd_convolve_copy/, "$sse2_x86inc";
Zoe Liu's avatar
Zoe Liu committed
504
505

  add_proto qw/void vpx_highbd_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
506
  specialize qw/vpx_highbd_convolve_avg/, "$sse2_x86inc";
Zoe Liu's avatar
Zoe Liu committed
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526

  add_proto qw/void vpx_highbd_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
  specialize qw/vpx_highbd_convolve8/, "$sse2_x86_64";

  add_proto qw/void vpx_highbd_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
  specialize qw/vpx_highbd_convolve8_horiz/, "$sse2_x86_64";

  add_proto qw/void vpx_highbd_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
  specialize qw/vpx_highbd_convolve8_vert/, "$sse2_x86_64";

  add_proto qw/void vpx_highbd_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
  specialize qw/vpx_highbd_convolve8_avg/, "$sse2_x86_64";

  add_proto qw/void vpx_highbd_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
  specialize qw/vpx_highbd_convolve8_avg_horiz/, "$sse2_x86_64";

  add_proto qw/void vpx_highbd_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
  specialize qw/vpx_highbd_convolve8_avg_vert/, "$sse2_x86_64";
}  # CONFIG_VP9_HIGHBITDEPTH

527
528
529
#
# Loopfilter
#
530
add_proto qw/void vpx_lpf_vertical_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
531
specialize qw/vpx_lpf_vertical_16 sse2 neon_asm dspr2 msa/;
532
$vpx_lpf_vertical_16_neon_asm=vpx_lpf_vertical_16_neon;
533

534
add_proto qw/void vpx_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
535
specialize qw/vpx_lpf_vertical_16_dual sse2 neon_asm dspr2 msa/;
536
$vpx_lpf_vertical_16_dual_neon_asm=vpx_lpf_vertical_16_dual_neon;
537

538
add_proto qw/void vpx_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
539
specialize qw/vpx_lpf_vertical_8 sse2 neon dspr2 msa/;
540

541
add_proto qw/void vpx_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
542
specialize qw/vpx_lpf_vertical_8_dual sse2 neon_asm dspr2 msa/;
543
$vpx_lpf_vertical_8_dual_neon_asm=vpx_lpf_vertical_8_dual_neon;
544

545
add_proto qw/void vpx_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
546
specialize qw/vpx_lpf_vertical_4 mmx neon dspr2 msa/;
547

548
add_proto qw/void vpx_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
549
specialize qw/vpx_lpf_vertical_4_dual sse2 neon dspr2 msa/;
550

551
552
553
554
555
556
557
add_proto qw/void vpx_lpf_horizontal_edge_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
specialize qw/vpx_lpf_horizontal_edge_8 sse2 avx2 neon_asm dspr2 msa/;
$vpx_lpf_horizontal_edge_8_neon_asm=vpx_lpf_horizontal_edge_8_neon;

add_proto qw/void vpx_lpf_horizontal_edge_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
specialize qw/vpx_lpf_horizontal_edge_16 sse2 avx2 neon_asm dspr2 msa/;
$vpx_lpf_horizontal_edge_16_neon_asm=vpx_lpf_horizontal_edge_16_neon;
558

559
add_proto qw/void vpx_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
560
specialize qw/vpx_lpf_horizontal_8 sse2 neon dspr2 msa/;
561

562
add_proto qw/void vpx_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
563
specialize qw/vpx_lpf_horizontal_8_dual sse2 neon_asm dspr2 msa/;
564
$vpx_lpf_horizontal_8_dual_neon_asm=vpx_lpf_horizontal_8_dual_neon;
565

566
add_proto qw/void vpx_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
567
specialize qw/vpx_lpf_horizontal_4 mmx neon dspr2 msa/;
568

569
add_proto qw/void vpx_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
570
specialize qw/vpx_lpf_horizontal_4_dual sse2 neon dspr2 msa/;
571
572

if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
573
574
  add_proto qw/void vpx_highbd_lpf_vertical_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
  specialize qw/vpx_highbd_lpf_vertical_16 sse2/;
575

576
577
  add_proto qw/void vpx_highbd_lpf_vertical_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
  specialize qw/vpx_highbd_lpf_vertical_16_dual sse2/;
578

579
  add_proto qw/void vpx_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
580
  specialize qw/vpx_highbd_lpf_vertical_8 sse2/;
581

582
583
  add_proto qw/void vpx_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
  specialize qw/vpx_highbd_lpf_vertical_8_dual sse2/;
584

585
  add_proto qw/void vpx_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
586
  specialize qw/vpx_highbd_lpf_vertical_4 sse2/;
587

588
589
  add_proto qw/void vpx_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
  specialize qw/vpx_highbd_lpf_vertical_4_dual sse2/;
590

591
592
593
594
595
  add_proto qw/void vpx_highbd_lpf_horizontal_edge_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
  specialize qw/vpx_highbd_lpf_horizontal_edge_8 sse2/;

  add_proto qw/void vpx_highbd_lpf_horizontal_edge_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
  specialize qw/vpx_highbd_lpf_horizontal_edge_16 sse2/;
596

597
  add_proto qw/void vpx_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
598
  specialize qw/vpx_highbd_lpf_horizontal_8 sse2/;
599

600
601
  add_proto qw/void vpx_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
  specialize qw/vpx_highbd_lpf_horizontal_8_dual sse2/;
602

603
  add_proto qw/void vpx_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
604
  specialize qw/vpx_highbd_lpf_horizontal_4 sse2/;
605

606
607
  add_proto qw/void vpx_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
  specialize qw/vpx_highbd_lpf_horizontal_4_dual sse2/;
608
609
}  # CONFIG_VP9_HIGHBITDEPTH

610
611
612
#
# Encoder functions.
#
613
614
615
616

#
# Forward transform
#
Jingning Han's avatar
Jingning Han committed
617
if ((vpx_config("CONFIG_VP9_ENCODER") eq "yes") || (vpx_config("CONFIG_VP10_ENCODER") eq "yes")) {
618
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
619
620
  add_proto qw/void vpx_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_fdct4x4 sse2/;
621

622
623
  add_proto qw/void vpx_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_fdct4x4_1 sse2/;
624

625
626
  add_proto qw/void vpx_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_fdct8x8 sse2/;
627

628
629
  add_proto qw/void vpx_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_fdct8x8_1 sse2/;
630

631
632
  add_proto qw/void vpx_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_fdct16x16 sse2/;
633

634
635
  add_proto qw/void vpx_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_fdct16x16_1 sse2/;
636

637
638
  add_proto qw/void vpx_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_fdct32x32 sse2/;
639

640
641
  add_proto qw/void vpx_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_fdct32x32_rd sse2/;
642

643
644
  add_proto qw/void vpx_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_fdct32x32_1 sse2/;
645

646
647
  add_proto qw/void vpx_highbd_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_highbd_fdct4x4 sse2/;
648

649
650
  add_proto qw/void vpx_highbd_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_highbd_fdct8x8 sse2/;
651

652
653
  add_proto qw/void vpx_highbd_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_highbd_fdct8x8_1/;
654

655
656
  add_proto qw/void vpx_highbd_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_highbd_fdct16x16 sse2/;
657

658
659
  add_proto qw/void vpx_highbd_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_highbd_fdct16x16_1/;
660

661
662
  add_proto qw/void vpx_highbd_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_highbd_fdct32x32 sse2/;
663

664
665
  add_proto qw/void vpx_highbd_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_highbd_fdct32x32_rd sse2/;
666

667
668
  add_proto qw/void vpx_highbd_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_highbd_fdct32x32_1/;
669
} else {
670
671
  add_proto qw/void vpx_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_fdct4x4 sse2 msa/;
672

673
674
  add_proto qw/void vpx_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_fdct4x4_1 sse2/;
675

676
677
  add_proto qw/void vpx_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_fdct8x8 sse2 neon msa/, "$ssse3_x86_64_x86inc";
678

679
680
  add_proto qw/void vpx_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_fdct8x8_1 sse2 neon msa/;
681

682
683
  add_proto qw/void vpx_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_fdct16x16 sse2 msa/;
684

685
686
  add_proto qw/void vpx_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_fdct16x16_1 sse2 msa/;
687

688
689
  add_proto qw/void vpx_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_fdct32x32 sse2 avx2 msa/;
690

691
692
  add_proto qw/void vpx_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_fdct32x32_rd sse2 avx2 msa/;
693

694
695
  add_proto qw/void vpx_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_fdct32x32_1 sse2 msa/;
696
}  # CONFIG_VP9_HIGHBITDEPTH
Jingning Han's avatar
Jingning Han committed
697
}  # CONFIG_VP9_ENCODER || CONFIG_VP10_ENCODER
698

699
700
#
# Inverse transform
Jingning Han's avatar
Jingning Han committed
701
if ((vpx_config("CONFIG_VP9") eq "yes") || (vpx_config("CONFIG_VP10") eq "yes")) {
702
703
704
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
  # Note as optimized versions of these functions are added we need to add a check to ensure
  # that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
705
706
  add_proto qw/void vpx_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/vpx_iwht4x4_1_add/;
707

708
  add_proto qw/void vpx_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
709
  specialize qw/vpx_iwht4x4_16_add/, "$sse2_x86inc";
710

711
712
  add_proto qw/void vpx_highbd_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
  specialize qw/vpx_highbd_idct4x4_1_add/;
713

714
715
  add_proto qw/void vpx_highbd_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
  specialize qw/vpx_highbd_idct8x8_1_add/;
716

717
718
  add_proto qw/void vpx_highbd_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
  specialize qw/vpx_highbd_idct16x16_1_add/;
719

720
721
  add_proto qw/void vpx_highbd_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
  specialize qw/vpx_highbd_idct32x32_1024_add/;
722

723
724
  add_proto qw/void vpx_highbd_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
  specialize qw/vpx_highbd_idct32x32_34_add/;
725

726
727
  add_proto qw/void vpx_highbd_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
  specialize qw/vpx_highbd_idct32x32_1_add/;
728

729
730
  add_proto qw/void vpx_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
  specialize qw/vpx_highbd_iwht4x4_1_add/;
731

732
733
  add_proto qw/void vpx_highbd_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
  specialize qw/vpx_highbd_iwht4x4_16_add/;
734
735
736

  # Force C versions if CONFIG_EMULATE_HARDWARE is 1
  if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
    add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vpx_idct4x4_16_add/;

    add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vpx_idct4x4_1_add/;

    add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vpx_idct8x8_64_add/;

    add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vpx_idct8x8_12_add/;

    add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vpx_idct8x8_1_add/;

    add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vpx_idct16x16_256_add/;

    add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vpx_idct16x16_10_add/;

    add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vpx_idct16x16_1_add/;

    add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vpx_idct32x32_1024_add/;

764
765
766
    add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vpx_idct32x32_135_add/;

767
768
769
770
771
    add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vpx_idct32x32_34_add/;

    add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vpx_idct32x32_1_add/;
772

773
774
    add_proto qw/void vpx_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
    specialize qw/vpx_highbd_idct4x4_16_add/;
775

776
777
    add_proto qw/void vpx_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
    specialize qw/vpx_highbd_idct8x8_64_add/;
778

779
780
    add_proto qw/void vpx_highbd_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
    specialize qw/vpx_highbd_idct8x8_10_add/;
781

782
783
    add_proto qw/void vpx_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
    specialize qw/vpx_highbd_idct16x16_256_add/;
784

785
786
    add_proto qw/void vpx_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
    specialize qw/vpx_highbd_idct16x16_10_add/;
787
  } else {
788
789
790
791
792
793
794
    add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vpx_idct4x4_16_add sse2/;

    add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vpx_idct4x4_1_add sse2/;

    add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
795
    specialize qw/vpx_idct8x8_64_add sse2/, "$ssse3_x86_64_x86inc";
796
797

    add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
798
    specialize qw/vpx_idct8x8_12_add sse2/, "$ssse3_x86_64_x86inc";
799
800
801
802
803
804
805
806
807
808
809
810
811
812

    add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vpx_idct8x8_1_add sse2/;

    add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vpx_idct16x16_256_add sse2/;

    add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vpx_idct16x16_10_add sse2/;

    add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vpx_idct16x16_1_add sse2/;

    add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
813
    specialize qw/vpx_idct32x32_1024_add sse2/, "$ssse3_x86_64_x86inc";
814

815
    add_proto qw/void vpx_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
816
817
    specialize qw/vpx_idct32x32_135_add sse2/, "$ssse3_x86_64_x86inc";
    # Need to add 135 eob idct32x32 implementations.