vpx_dsp_rtcd_defs.pl 115 KB
Newer Older
Johann's avatar
Johann committed
1
2
3
4
5
6
7
sub vpx_dsp_forward_decls() {
print <<EOF
/*
 * DSP
 */

#include "vpx/vpx_integer.h"
8
#include "vpx_dsp/vpx_dsp_common.h"
Johann's avatar
Johann committed
9
10
11
12
13

EOF
}
forward_decls qw/vpx_dsp_forward_decls/;

14
15
16
17
18
19
# x86inc.asm had specific constraints. break it out so it's easy to disable.
# zero all the variables to avoid tricky else conditions.
$mmx_x86inc = $sse_x86inc = $sse2_x86inc = $ssse3_x86inc = $avx_x86inc =
  $avx2_x86inc = '';
$mmx_x86_64_x86inc = $sse_x86_64_x86inc = $sse2_x86_64_x86inc =
  $ssse3_x86_64_x86inc = $avx_x86_64_x86inc = $avx2_x86_64_x86inc = '';
Johann's avatar
Johann committed
20
21
22
23
24
25
26
if (vpx_config("CONFIG_USE_X86INC") eq "yes") {
  $mmx_x86inc = 'mmx';
  $sse_x86inc = 'sse';
  $sse2_x86inc = 'sse2';
  $ssse3_x86inc = 'ssse3';
  $avx_x86inc = 'avx';
  $avx2_x86inc = 'avx2';
27
28
29
30
31
32
33
34
  if ($opts{arch} eq "x86_64") {
    $mmx_x86_64_x86inc = 'mmx';
    $sse_x86_64_x86inc = 'sse';
    $sse2_x86_64_x86inc = 'sse2';
    $ssse3_x86_64_x86inc = 'ssse3';
    $avx_x86_64_x86inc = 'avx';
    $avx2_x86_64_x86inc = 'avx2';
  }
Johann's avatar
Johann committed
35
36
}

Zoe Liu's avatar
Zoe Liu committed
37
38
39
40
41
42
# optimizations which depend on multiple features
$avx2_ssse3 = '';
if ((vpx_config("HAVE_AVX2") eq "yes") && (vpx_config("HAVE_SSSE3") eq "yes")) {
  $avx2_ssse3 = 'avx2';
}

43
44
# functions that are 64 bit only.
$mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 = $avx_x86_64 = $avx2_x86_64 = '';
Johann's avatar
Johann committed
45
46
47
48
49
50
51
52
if ($opts{arch} eq "x86_64") {
  $mmx_x86_64 = 'mmx';
  $sse2_x86_64 = 'sse2';
  $ssse3_x86_64 = 'ssse3';
  $avx_x86_64 = 'avx';
  $avx2_x86_64 = 'avx2';
}

53
54
55
56
#
# Intra prediction
#

Jingning Han's avatar
Jingning Han committed
57
if ((vpx_config("CONFIG_VP9") eq "yes") || (vpx_config("CONFIG_VP10") eq "yes")) {
58
59
  add_proto qw/void vpx_d207_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_d207_predictor_4x4/, "$ssse3_x86inc";
60

61
62
  add_proto qw/void vpx_d45_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_d45_predictor_4x4 neon/, "$ssse3_x86inc";
63

64
65
  add_proto qw/void vpx_d63_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_d63_predictor_4x4/, "$ssse3_x86inc";
66

67
68
  add_proto qw/void vpx_h_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_h_predictor_4x4 neon dspr2 msa/, "$ssse3_x86inc";
69

70
71
  add_proto qw/void vpx_d117_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_d117_predictor_4x4/;
72

73
74
  add_proto qw/void vpx_d135_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_d135_predictor_4x4 neon/;
75

76
77
  add_proto qw/void vpx_d153_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_d153_predictor_4x4/, "$ssse3_x86inc";
78

79
80
  add_proto qw/void vpx_v_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_v_predictor_4x4 neon msa/, "$sse_x86inc";
81

82
83
  add_proto qw/void vpx_tm_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_tm_predictor_4x4 neon dspr2 msa/, "$sse_x86inc";
84

85
86
  add_proto qw/void vpx_dc_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_dc_predictor_4x4 dspr2 msa neon/, "$sse_x86inc";
87

88
89
  add_proto qw/void vpx_dc_top_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_dc_top_predictor_4x4 msa neon/, "$sse_x86inc";
90

91
92
  add_proto qw/void vpx_dc_left_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_dc_left_predictor_4x4 msa neon/, "$sse_x86inc";
93

94
95
  add_proto qw/void vpx_dc_128_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_dc_128_predictor_4x4 msa neon/, "$sse_x86inc";
96

97
98
  add_proto qw/void vpx_d207_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_d207_predictor_8x8/, "$ssse3_x86inc";
99

100
101
  add_proto qw/void vpx_d45_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_d45_predictor_8x8 neon/, "$ssse3_x86inc";
102

103
104
  add_proto qw/void vpx_d63_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_d63_predictor_8x8/, "$ssse3_x86inc";
105

106
107
  add_proto qw/void vpx_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_h_predictor_8x8 neon dspr2 msa/, "$ssse3_x86inc";
108

109
110
  add_proto qw/void vpx_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_d117_predictor_8x8/;
111

112
113
  add_proto qw/void vpx_d135_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_d135_predictor_8x8/;
114

115
116
  add_proto qw/void vpx_d153_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_d153_predictor_8x8/, "$ssse3_x86inc";
117

118
119
  add_proto qw/void vpx_v_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_v_predictor_8x8 neon msa/, "$sse_x86inc";
120

121
122
  add_proto qw/void vpx_tm_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_tm_predictor_8x8 neon dspr2 msa/, "$sse2_x86inc";
123

124
125
  add_proto qw/void vpx_dc_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_dc_predictor_8x8 dspr2 neon msa/, "$sse_x86inc";
126

127
128
  add_proto qw/void vpx_dc_top_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_dc_top_predictor_8x8 neon msa/, "$sse_x86inc";
129

130
131
  add_proto qw/void vpx_dc_left_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_dc_left_predictor_8x8 neon msa/, "$sse_x86inc";
132

133
134
  add_proto qw/void vpx_dc_128_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_dc_128_predictor_8x8 neon msa/, "$sse_x86inc";
135

136
137
  add_proto qw/void vpx_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_d207_predictor_16x16/, "$ssse3_x86inc";
138

139
140
  add_proto qw/void vpx_d45_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_d45_predictor_16x16 neon/, "$ssse3_x86inc";
141

142
143
  add_proto qw/void vpx_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_d63_predictor_16x16/, "$ssse3_x86inc";
144

145
146
  add_proto qw/void vpx_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_h_predictor_16x16 neon dspr2 msa/, "$ssse3_x86inc";
147

148
149
  add_proto qw/void vpx_d117_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_d117_predictor_16x16/;
150

151
152
  add_proto qw/void vpx_d135_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_d135_predictor_16x16/;
153

154
155
  add_proto qw/void vpx_d153_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_d153_predictor_16x16/, "$ssse3_x86inc";
156

157
158
  add_proto qw/void vpx_v_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_v_predictor_16x16 neon msa/, "$sse2_x86inc";
159

160
161
  add_proto qw/void vpx_tm_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_tm_predictor_16x16 neon msa/, "$sse2_x86inc";
162

163
164
  add_proto qw/void vpx_dc_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_dc_predictor_16x16 dspr2 neon msa/, "$sse2_x86inc";
165

166
167
  add_proto qw/void vpx_dc_top_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_dc_top_predictor_16x16 neon msa/, "$sse2_x86inc";
168

169
170
  add_proto qw/void vpx_dc_left_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_dc_left_predictor_16x16 neon msa/, "$sse2_x86inc";
171

172
173
  add_proto qw/void vpx_dc_128_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_dc_128_predictor_16x16 neon msa/, "$sse2_x86inc";
174

175
176
  add_proto qw/void vpx_d207_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_d207_predictor_32x32/, "$ssse3_x86inc";
177

178
179
  add_proto qw/void vpx_d45_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_d45_predictor_32x32/, "$ssse3_x86inc";
180

181
182
  add_proto qw/void vpx_d63_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_d63_predictor_32x32/, "$ssse3_x86inc";
183

184
185
  add_proto qw/void vpx_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_h_predictor_32x32 neon msa/, "$ssse3_x86inc";
186

187
188
  add_proto qw/void vpx_d117_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_d117_predictor_32x32/;
189

190
191
  add_proto qw/void vpx_d135_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_d135_predictor_32x32/;
192

193
194
  add_proto qw/void vpx_d153_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_d153_predictor_32x32/, "$ssse3_x86inc";
195

196
197
  add_proto qw/void vpx_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_v_predictor_32x32 neon msa/, "$sse2_x86inc";
198

199
200
  add_proto qw/void vpx_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_tm_predictor_32x32 neon msa/, "$sse2_x86_64_x86inc";
201

202
203
  add_proto qw/void vpx_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_dc_predictor_32x32 msa neon/, "$sse2_x86inc";
204

205
206
  add_proto qw/void vpx_dc_top_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_dc_top_predictor_32x32 msa neon/, "$sse2_x86inc";
207

208
209
210
211
212
  add_proto qw/void vpx_dc_left_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_dc_left_predictor_32x32 msa neon/, "$sse2_x86inc";

  add_proto qw/void vpx_dc_128_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
  specialize qw/vpx_dc_128_predictor_32x32 msa neon/, "$sse2_x86inc";
213
214

# High bitdepth functions
215
216
217
  if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
    add_proto qw/void vpx_highbd_d207_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_d207_predictor_4x4/;
218

219
220
    add_proto qw/void vpx_highbd_d45_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_d45_predictor_4x4/;
221

222
223
    add_proto qw/void vpx_highbd_d63_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_d63_predictor_4x4/;
224

225
226
    add_proto qw/void vpx_highbd_h_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_h_predictor_4x4/;
227

228
229
    add_proto qw/void vpx_highbd_d117_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_d117_predictor_4x4/;
230

231
232
    add_proto qw/void vpx_highbd_d135_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_d135_predictor_4x4/;
233

234
235
    add_proto qw/void vpx_highbd_d153_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_d153_predictor_4x4/;
236

237
238
    add_proto qw/void vpx_highbd_v_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_v_predictor_4x4/, "$sse_x86inc";
239

240
241
    add_proto qw/void vpx_highbd_tm_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_tm_predictor_4x4/, "$sse_x86inc";
242

243
244
    add_proto qw/void vpx_highbd_dc_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_dc_predictor_4x4/, "$sse_x86inc";
245

246
247
    add_proto qw/void vpx_highbd_dc_top_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_dc_top_predictor_4x4/;
248

249
250
    add_proto qw/void vpx_highbd_dc_left_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_dc_left_predictor_4x4/;
251

252
253
    add_proto qw/void vpx_highbd_dc_128_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_dc_128_predictor_4x4/;
254

255
256
    add_proto qw/void vpx_highbd_d207_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_d207_predictor_8x8/;
257

258
259
    add_proto qw/void vpx_highbd_d45_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_d45_predictor_8x8/;
260

261
262
    add_proto qw/void vpx_highbd_d63_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_d63_predictor_8x8/;
263

264
265
    add_proto qw/void vpx_highbd_h_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_h_predictor_8x8/;
266

267
268
    add_proto qw/void vpx_highbd_d117_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_d117_predictor_8x8/;
269

270
271
    add_proto qw/void vpx_highbd_d135_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_d135_predictor_8x8/;
272

273
274
    add_proto qw/void vpx_highbd_d153_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_d153_predictor_8x8/;
275

276
277
    add_proto qw/void vpx_highbd_v_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_v_predictor_8x8/, "$sse2_x86inc";
278

279
280
    add_proto qw/void vpx_highbd_tm_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_tm_predictor_8x8/, "$sse2_x86inc";
281

282
283
    add_proto qw/void vpx_highbd_dc_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_dc_predictor_8x8/, "$sse2_x86inc";;
284

285
286
    add_proto qw/void vpx_highbd_dc_top_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_dc_top_predictor_8x8/;
287

288
289
    add_proto qw/void vpx_highbd_dc_left_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_dc_left_predictor_8x8/;
290

291
292
    add_proto qw/void vpx_highbd_dc_128_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_dc_128_predictor_8x8/;
293

294
295
    add_proto qw/void vpx_highbd_d207_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_d207_predictor_16x16/;
296

297
298
    add_proto qw/void vpx_highbd_d45_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_d45_predictor_16x16/;
299

300
301
    add_proto qw/void vpx_highbd_d63_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_d63_predictor_16x16/;
302

303
304
    add_proto qw/void vpx_highbd_h_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_h_predictor_16x16/;
305

306
307
    add_proto qw/void vpx_highbd_d117_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_d117_predictor_16x16/;
308

309
310
    add_proto qw/void vpx_highbd_d135_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_d135_predictor_16x16/;
311

312
313
    add_proto qw/void vpx_highbd_d153_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_d153_predictor_16x16/;
314

315
316
    add_proto qw/void vpx_highbd_v_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_v_predictor_16x16/, "$sse2_x86inc";
317

318
319
    add_proto qw/void vpx_highbd_tm_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_tm_predictor_16x16/, "$sse2_x86_64_x86inc";
320

321
322
    add_proto qw/void vpx_highbd_dc_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_dc_predictor_16x16/, "$sse2_x86inc";
323

324
325
    add_proto qw/void vpx_highbd_dc_top_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_dc_top_predictor_16x16/;
326

327
328
    add_proto qw/void vpx_highbd_dc_left_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_dc_left_predictor_16x16/;
329

330
331
    add_proto qw/void vpx_highbd_dc_128_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_dc_128_predictor_16x16/;
332

333
334
    add_proto qw/void vpx_highbd_d207_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_d207_predictor_32x32/;
335

336
337
    add_proto qw/void vpx_highbd_d45_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_d45_predictor_32x32/;
338

339
340
    add_proto qw/void vpx_highbd_d63_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_d63_predictor_32x32/;
341

342
343
    add_proto qw/void vpx_highbd_h_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_h_predictor_32x32/;
344

345
346
    add_proto qw/void vpx_highbd_d117_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_d117_predictor_32x32/;
347

348
349
    add_proto qw/void vpx_highbd_d135_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_d135_predictor_32x32/;
350

351
352
    add_proto qw/void vpx_highbd_d153_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_d153_predictor_32x32/;
353

354
355
    add_proto qw/void vpx_highbd_v_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_v_predictor_32x32/, "$sse2_x86inc";
356

357
358
    add_proto qw/void vpx_highbd_tm_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_tm_predictor_32x32/, "$sse2_x86_64_x86inc";
359

360
361
    add_proto qw/void vpx_highbd_dc_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_dc_predictor_32x32/, "$sse2_x86_64_x86inc";
362

363
364
    add_proto qw/void vpx_highbd_dc_top_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_dc_top_predictor_32x32/;
365

366
367
    add_proto qw/void vpx_highbd_dc_left_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_dc_left_predictor_32x32/;
368

369
370
371
    add_proto qw/void vpx_highbd_dc_128_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
    specialize qw/vpx_highbd_dc_128_predictor_32x32/;
  }  # CONFIG_VP9_HIGHBITDEPTH
Jingning Han's avatar
Jingning Han committed
372
}  # CONFIG_VP9 || CONFIG_VP10
373

Zoe Liu's avatar
Zoe Liu committed
374
375
376
377
#
# Sub Pixel Filters
#
add_proto qw/void vpx_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
Zoe Liu's avatar
Zoe Liu committed
378
specialize qw/vpx_convolve_copy neon dspr2 msa/, "$sse2_x86inc";
Zoe Liu's avatar
Zoe Liu committed
379
380

add_proto qw/void vpx_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
Zoe Liu's avatar
Zoe Liu committed
381
specialize qw/vpx_convolve_avg neon dspr2 msa/, "$sse2_x86inc";
Zoe Liu's avatar
Zoe Liu committed
382
383

add_proto qw/void vpx_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
Zoe Liu's avatar
Zoe Liu committed
384
specialize qw/vpx_convolve8 sse2 ssse3 neon dspr2 msa/, "$avx2_ssse3";
Zoe Liu's avatar
Zoe Liu committed
385
386

add_proto qw/void vpx_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
Zoe Liu's avatar
Zoe Liu committed
387
specialize qw/vpx_convolve8_horiz sse2 ssse3 neon dspr2 msa/, "$avx2_ssse3";
Zoe Liu's avatar
Zoe Liu committed
388
389

add_proto qw/void vpx_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
Zoe Liu's avatar
Zoe Liu committed
390
specialize qw/vpx_convolve8_vert sse2 ssse3 neon dspr2 msa/, "$avx2_ssse3";
Zoe Liu's avatar
Zoe Liu committed
391
392

add_proto qw/void vpx_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
Zoe Liu's avatar
Zoe Liu committed
393
specialize qw/vpx_convolve8_avg sse2 ssse3 neon dspr2 msa/;
Zoe Liu's avatar
Zoe Liu committed
394
395

add_proto qw/void vpx_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
Zoe Liu's avatar
Zoe Liu committed
396
specialize qw/vpx_convolve8_avg_horiz sse2 ssse3 neon dspr2 msa/;
Zoe Liu's avatar
Zoe Liu committed
397
398

add_proto qw/void vpx_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
Zoe Liu's avatar
Zoe Liu committed
399
specialize qw/vpx_convolve8_avg_vert sse2 ssse3 neon dspr2 msa/;
Zoe Liu's avatar
Zoe Liu committed
400

401
add_proto qw/void vpx_scaled_2d/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
Scott LaVarnway's avatar
Scott LaVarnway committed
402
specialize qw/vpx_scaled_2d ssse3/;
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418

add_proto qw/void vpx_scaled_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
specialize qw/vpx_scaled_horiz/;

add_proto qw/void vpx_scaled_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
specialize qw/vpx_scaled_vert/;

add_proto qw/void vpx_scaled_avg_2d/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
specialize qw/vpx_scaled_avg_2d/;

add_proto qw/void vpx_scaled_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
specialize qw/vpx_scaled_avg_horiz/;

add_proto qw/void vpx_scaled_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
specialize qw/vpx_scaled_avg_vert/;

Zoe Liu's avatar
Zoe Liu committed
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
  #
  # Sub Pixel Filters
  #
  add_proto qw/void vpx_highbd_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
  specialize qw/vpx_highbd_convolve_copy/;

  add_proto qw/void vpx_highbd_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
  specialize qw/vpx_highbd_convolve_avg/;

  add_proto qw/void vpx_highbd_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
  specialize qw/vpx_highbd_convolve8/, "$sse2_x86_64";

  add_proto qw/void vpx_highbd_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
  specialize qw/vpx_highbd_convolve8_horiz/, "$sse2_x86_64";

  add_proto qw/void vpx_highbd_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
  specialize qw/vpx_highbd_convolve8_vert/, "$sse2_x86_64";

  add_proto qw/void vpx_highbd_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
  specialize qw/vpx_highbd_convolve8_avg/, "$sse2_x86_64";

  add_proto qw/void vpx_highbd_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
  specialize qw/vpx_highbd_convolve8_avg_horiz/, "$sse2_x86_64";

  add_proto qw/void vpx_highbd_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
  specialize qw/vpx_highbd_convolve8_avg_vert/, "$sse2_x86_64";
}  # CONFIG_VP9_HIGHBITDEPTH

448
449
450
#
# Loopfilter
#
451
add_proto qw/void vpx_lpf_vertical_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
452
specialize qw/vpx_lpf_vertical_16 sse2 neon_asm dspr2 msa/;
453
$vpx_lpf_vertical_16_neon_asm=vpx_lpf_vertical_16_neon;
454

455
add_proto qw/void vpx_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
456
specialize qw/vpx_lpf_vertical_16_dual sse2 neon_asm dspr2 msa/;
457
$vpx_lpf_vertical_16_dual_neon_asm=vpx_lpf_vertical_16_dual_neon;
458

459
add_proto qw/void vpx_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
460
specialize qw/vpx_lpf_vertical_8 sse2 neon dspr2 msa/;
461

462
add_proto qw/void vpx_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
463
specialize qw/vpx_lpf_vertical_8_dual sse2 neon_asm dspr2 msa/;
464
$vpx_lpf_vertical_8_dual_neon_asm=vpx_lpf_vertical_8_dual_neon;
465

466
add_proto qw/void vpx_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
467
specialize qw/vpx_lpf_vertical_4 mmx neon dspr2 msa/;
468

469
add_proto qw/void vpx_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
470
specialize qw/vpx_lpf_vertical_4_dual sse2 neon dspr2 msa/;
471

472
add_proto qw/void vpx_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
473
specialize qw/vpx_lpf_horizontal_16 sse2 avx2 neon_asm dspr2 msa/;
474
$vpx_lpf_horizontal_16_neon_asm=vpx_lpf_horizontal_16_neon;
475

476
add_proto qw/void vpx_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
477
specialize qw/vpx_lpf_horizontal_8 sse2 neon dspr2 msa/;
478

479
add_proto qw/void vpx_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
480
specialize qw/vpx_lpf_horizontal_8_dual sse2 neon_asm dspr2 msa/;
481
$vpx_lpf_horizontal_8_dual_neon_asm=vpx_lpf_horizontal_8_dual_neon;
482

483
add_proto qw/void vpx_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
484
specialize qw/vpx_lpf_horizontal_4 mmx neon dspr2 msa/;
485

486
add_proto qw/void vpx_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
487
specialize qw/vpx_lpf_horizontal_4_dual sse2 neon dspr2 msa/;
488
489

if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
490
491
  add_proto qw/void vpx_highbd_lpf_vertical_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
  specialize qw/vpx_highbd_lpf_vertical_16 sse2/;
492

493
494
  add_proto qw/void vpx_highbd_lpf_vertical_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
  specialize qw/vpx_highbd_lpf_vertical_16_dual sse2/;
495

496
497
  add_proto qw/void vpx_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
  specialize qw/vpx_highbd_lpf_vertical_8 sse2/;
498

499
500
  add_proto qw/void vpx_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
  specialize qw/vpx_highbd_lpf_vertical_8_dual sse2/;
501

502
503
  add_proto qw/void vpx_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
  specialize qw/vpx_highbd_lpf_vertical_4 sse2/;
504

505
506
  add_proto qw/void vpx_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
  specialize qw/vpx_highbd_lpf_vertical_4_dual sse2/;
507

508
509
  add_proto qw/void vpx_highbd_lpf_horizontal_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
  specialize qw/vpx_highbd_lpf_horizontal_16 sse2/;
510

511
512
  add_proto qw/void vpx_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
  specialize qw/vpx_highbd_lpf_horizontal_8 sse2/;
513

514
515
  add_proto qw/void vpx_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
  specialize qw/vpx_highbd_lpf_horizontal_8_dual sse2/;
516

517
518
  add_proto qw/void vpx_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
  specialize qw/vpx_highbd_lpf_horizontal_4 sse2/;
519

520
521
  add_proto qw/void vpx_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
  specialize qw/vpx_highbd_lpf_horizontal_4_dual sse2/;
522
523
}  # CONFIG_VP9_HIGHBITDEPTH

524
525
526
#
# Encoder functions.
#
527
528
529
530

#
# Forward transform
#
Jingning Han's avatar
Jingning Han committed
531
if ((vpx_config("CONFIG_VP9_ENCODER") eq "yes") || (vpx_config("CONFIG_VP10_ENCODER") eq "yes")) {
532
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
533
534
  add_proto qw/void vpx_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_fdct4x4 sse2/;
535

536
537
  add_proto qw/void vpx_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_fdct4x4_1 sse2/;
538

539
540
  add_proto qw/void vpx_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_fdct8x8 sse2/;
541

542
543
  add_proto qw/void vpx_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_fdct8x8_1 sse2/;
544

545
546
  add_proto qw/void vpx_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_fdct16x16 sse2/;
547

548
549
  add_proto qw/void vpx_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_fdct16x16_1 sse2/;
550

551
552
  add_proto qw/void vpx_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_fdct32x32 sse2/;
553

554
555
  add_proto qw/void vpx_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_fdct32x32_rd sse2/;
556

557
558
  add_proto qw/void vpx_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_fdct32x32_1 sse2/;
559

560
561
  add_proto qw/void vpx_highbd_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_highbd_fdct4x4 sse2/;
562

563
564
  add_proto qw/void vpx_highbd_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_highbd_fdct8x8 sse2/;
565

566
567
  add_proto qw/void vpx_highbd_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_highbd_fdct8x8_1/;
568

569
570
  add_proto qw/void vpx_highbd_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_highbd_fdct16x16 sse2/;
571

572
573
  add_proto qw/void vpx_highbd_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_highbd_fdct16x16_1/;
574

575
576
  add_proto qw/void vpx_highbd_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_highbd_fdct32x32 sse2/;
577

578
579
  add_proto qw/void vpx_highbd_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_highbd_fdct32x32_rd sse2/;
580

581
582
  add_proto qw/void vpx_highbd_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_highbd_fdct32x32_1/;
583
} else {
584
585
  add_proto qw/void vpx_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_fdct4x4 sse2 msa/;
586

587
588
  add_proto qw/void vpx_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_fdct4x4_1 sse2/;
589

590
591
  add_proto qw/void vpx_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_fdct8x8 sse2 neon msa/, "$ssse3_x86_64_x86inc";
592

593
594
  add_proto qw/void vpx_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_fdct8x8_1 sse2 neon msa/;
595

596
597
  add_proto qw/void vpx_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_fdct16x16 sse2 msa/;
598

599
600
  add_proto qw/void vpx_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_fdct16x16_1 sse2 msa/;
601

602
603
  add_proto qw/void vpx_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_fdct32x32 sse2 avx2 msa/;
604

605
606
  add_proto qw/void vpx_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_fdct32x32_rd sse2 avx2 msa/;
607

608
609
  add_proto qw/void vpx_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
  specialize qw/vpx_fdct32x32_1 sse2 msa/;
610
}  # CONFIG_VP9_HIGHBITDEPTH
Jingning Han's avatar
Jingning Han committed
611
}  # CONFIG_VP9_ENCODER || CONFIG_VP10_ENCODER
612

613
614
#
# Inverse transform
Jingning Han's avatar
Jingning Han committed
615
if ((vpx_config("CONFIG_VP9") eq "yes") || (vpx_config("CONFIG_VP10") eq "yes")) {
616
617
618
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
  # Note as optimized versions of these functions are added we need to add a check to ensure
  # that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
619
620
  add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/vpx_idct4x4_1_add/;
621

622
623
  add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/vpx_idct4x4_16_add/;
624

625
626
  add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/vpx_idct8x8_1_add/;
627

628
629
  add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/vpx_idct8x8_64_add/;
630

631
632
  add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/vpx_idct8x8_12_add/;
633

634
635
  add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/vpx_idct16x16_1_add/;
636

637
638
  add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/vpx_idct16x16_256_add/;
639

640
641
  add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/vpx_idct16x16_10_add/;
642

643
644
  add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/vpx_idct32x32_1024_add/;
645

646
647
  add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/vpx_idct32x32_34_add/;
648

649
650
  add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/vpx_idct32x32_1_add/;
651

652
653
  add_proto qw/void vpx_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/vpx_iwht4x4_1_add/;
654

655
656
  add_proto qw/void vpx_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
  specialize qw/vpx_iwht4x4_16_add/;
657

658
659
  add_proto qw/void vpx_highbd_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
  specialize qw/vpx_highbd_idct4x4_1_add/;
660

661
662
  add_proto qw/void vpx_highbd_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
  specialize qw/vpx_highbd_idct8x8_1_add/;
663

664
665
  add_proto qw/void vpx_highbd_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
  specialize qw/vpx_highbd_idct16x16_1_add/;
666

667
668
  add_proto qw/void vpx_highbd_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
  specialize qw/vpx_highbd_idct32x32_1024_add/;
669

670
671
  add_proto qw/void vpx_highbd_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
  specialize qw/vpx_highbd_idct32x32_34_add/;
672

673
674
  add_proto qw/void vpx_highbd_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
  specialize qw/vpx_highbd_idct32x32_1_add/;
675

676
677
  add_proto qw/void vpx_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
  specialize qw/vpx_highbd_iwht4x4_1_add/;
678

679
680
  add_proto qw/void vpx_highbd_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
  specialize qw/vpx_highbd_iwht4x4_16_add/;
681
682
683

  # Force C versions if CONFIG_EMULATE_HARDWARE is 1
  if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
684
685
    add_proto qw/void vpx_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
    specialize qw/vpx_highbd_idct4x4_16_add/;
686

687
688
    add_proto qw/void vpx_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
    specialize qw/vpx_highbd_idct8x8_64_add/;
689

690
691
    add_proto qw/void vpx_highbd_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
    specialize qw/vpx_highbd_idct8x8_10_add/;
692

693
694
    add_proto qw/void vpx_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
    specialize qw/vpx_highbd_idct16x16_256_add/;
695

696
697
    add_proto qw/void vpx_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
    specialize qw/vpx_highbd_idct16x16_10_add/;
698
  } else {
699
700
    add_proto qw/void vpx_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
    specialize qw/vpx_highbd_idct4x4_16_add sse2/;
701

702
703
    add_proto qw/void vpx_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
    specialize qw/vpx_highbd_idct8x8_64_add sse2/;
704

705
706
    add_proto qw/void vpx_highbd_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
    specialize qw/vpx_highbd_idct8x8_10_add sse2/;
707

708
709
    add_proto qw/void vpx_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
    specialize qw/vpx_highbd_idct16x16_256_add sse2/;
710

711
712
    add_proto qw/void vpx_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
    specialize qw/vpx_highbd_idct16x16_10_add sse2/;
713
714
715
716
  }  # CONFIG_EMULATE_HARDWARE
} else {
  # Force C versions if CONFIG_EMULATE_HARDWARE is 1
  if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
717
718
    add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vpx_idct4x4_1_add/;
719

720
721
    add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vpx_idct4x4_16_add/;
722

723
724
    add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vpx_idct8x8_1_add/;
725

726
727
    add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vpx_idct8x8_64_add/;
728

729
730
    add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vpx_idct8x8_12_add/;
731

732
733
    add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vpx_idct16x16_1_add/;
734

735
736
    add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vpx_idct16x16_256_add/;
737

738
739
    add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vpx_idct16x16_10_add/;
740

741
742
    add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vpx_idct32x32_1024_add/;
743

744
745
    add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vpx_idct32x32_34_add/;
746

747
748
    add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vpx_idct32x32_1_add/;
749

750
751
    add_proto qw/void vpx_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vpx_iwht4x4_1_add/;
752

753
754
    add_proto qw/void vpx_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vpx_iwht4x4_16_add/;
755
  } else {
756
757
    add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vpx_idct4x4_1_add sse2 neon dspr2 msa/;
758

759
760
    add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vpx_idct4x4_16_add sse2 neon dspr2 msa/;
761

762
763
    add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vpx_idct8x8_1_add sse2 neon dspr2 msa/;
764

765
766
    add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vpx_idct8x8_64_add sse2 neon dspr2 msa/, "$ssse3_x86_64_x86inc";
767

768
769
    add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vpx_idct8x8_12_add sse2 neon dspr2 msa/, "$ssse3_x86_64_x86inc";
770

771
772
    add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vpx_idct16x16_1_add sse2 neon dspr2 msa/;
773

774
775
    add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vpx_idct16x16_256_add sse2 neon dspr2 msa/;
776

777
778
    add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vpx_idct16x16_10_add sse2 neon dspr2 msa/;
779

780
781
    add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vpx_idct32x32_1024_add sse2 neon dspr2 msa/;
782

783
784
    add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
    specialize qw/vpx_idct32x32_34_add sse2 neon_asm dspr2 msa/;
785
    # Need to add 34 eob idct32x32 neon implementation.