Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
/* Copyright (c) 2008-2011 Octasic Inc.
Written by Jean-Marc Valin */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "mlp_train.h"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <semaphore.h>
#include <pthread.h>
#include <time.h>
#include <signal.h>
int stopped = 0;
void handler(int sig)
{
stopped = 1;
signal(sig, handler);
}
MLPTrain * mlp_init(int *topo, int nbLayers, float *inputs, float *outputs, int nbSamples)
{
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
int i, j, k;
MLPTrain *net;
int inDim, outDim;
net = malloc(sizeof(*net));
net->topo = malloc(nbLayers*sizeof(net->topo[0]));
for (i=0;i<nbLayers;i++)
net->topo[i] = topo[i];
inDim = topo[0];
outDim = topo[nbLayers-1];
net->in_rate = malloc((inDim+1)*sizeof(net->in_rate[0]));
net->weights = malloc((nbLayers-1)*sizeof(net->weights));
net->best_weights = malloc((nbLayers-1)*sizeof(net->weights));
for (i=0;i<nbLayers-1;i++)
{
net->weights[i] = malloc((topo[i]+1)*topo[i+1]*sizeof(net->weights[0][0]));
net->best_weights[i] = malloc((topo[i]+1)*topo[i+1]*sizeof(net->weights[0][0]));
}
double inMean[inDim];
for (j=0;j<inDim;j++)
{
double std=0;
inMean[j] = 0;
for (i=0;i<nbSamples;i++)
{
inMean[j] += inputs[i*inDim+j];
std += inputs[i*inDim+j]*inputs[i*inDim+j];
}
inMean[j] /= nbSamples;
std /= nbSamples;
net->in_rate[1+j] = .5/(.0001+std);
std = std-inMean[j]*inMean[j];
if (std<.001)
std = .001;
std = 1/sqrt(inDim*std);
for (k=0;k<topo[1];k++)
net->weights[0][k*(topo[0]+1)+j+1] = randn(std);
}
net->in_rate[0] = 1;
for (j=0;j<topo[1];j++)
{
double sum = 0;
for (k=0;k<inDim;k++)
sum += inMean[k]*net->weights[0][j*(topo[0]+1)+k+1];
net->weights[0][j*(topo[0]+1)] = -sum;
}
for (j=0;j<outDim;j++)
{
double mean = 0;
double std;
for (i=0;i<nbSamples;i++)
mean += outputs[i*outDim+j];
mean /= nbSamples;
std = 1/sqrt(topo[nbLayers-2]);
net->weights[nbLayers-2][j*(topo[nbLayers-2]+1)] = mean;
for (k=0;k<topo[nbLayers-2];k++)
net->weights[nbLayers-2][j*(topo[nbLayers-2]+1)+k+1] = randn(std);
}
return net;
}
#define MAX_NEURONS 100
double compute_gradient(MLPTrain *net, float *inputs, float *outputs, int nbSamples, double *W0_grad, double *W1_grad, double *error_rate)
{
int i,j;
int s;
int inDim, outDim, hiddenDim;
int *topo;
double *W0, *W1;
double rms=0;
int W0_size, W1_size;
double hidden[MAX_NEURONS];
double netOut[MAX_NEURONS];
double error[MAX_NEURONS];
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
topo = net->topo;
inDim = net->topo[0];
hiddenDim = net->topo[1];
outDim = net->topo[2];
W0_size = (topo[0]+1)*topo[1];
W1_size = (topo[1]+1)*topo[2];
W0 = net->weights[0];
W1 = net->weights[1];
memset(W0_grad, 0, W0_size*sizeof(double));
memset(W1_grad, 0, W1_size*sizeof(double));
for (i=0;i<outDim;i++)
netOut[i] = outputs[i];
for (i=0;i<outDim;i++)
error_rate[i] = 0;
for (s=0;s<nbSamples;s++)
{
float *in, *out;
in = inputs+s*inDim;
out = outputs + s*outDim;
for (i=0;i<hiddenDim;i++)
{
double sum = W0[i*(inDim+1)];
for (j=0;j<inDim;j++)
sum += W0[i*(inDim+1)+j+1]*in[j];
hidden[i] = tansig_approx(sum);
}
for (i=0;i<outDim;i++)
{
double sum = W1[i*(hiddenDim+1)];
for (j=0;j<hiddenDim;j++)
sum += W1[i*(hiddenDim+1)+j+1]*hidden[j];
netOut[i] = tansig_approx(sum);
error[i] = out[i] - netOut[i];
rms += error[i]*error[i];
error_rate[i] += fabs(error[i])>1;
/*error[i] = error[i]/(1+fabs(error[i]));*/
}
/* Back-propagate error */
for (i=0;i<outDim;i++)
{
float grad = 1-netOut[i]*netOut[i];
W1_grad[i*(hiddenDim+1)] += error[i]*grad;
for (j=0;j<hiddenDim;j++)
W1_grad[i*(hiddenDim+1)+j+1] += grad*error[i]*hidden[j];
}
for (i=0;i<hiddenDim;i++)
{
double grad;
grad = 0;
for (j=0;j<outDim;j++)
grad += error[j]*W1[j*(hiddenDim+1)+i+1];
grad *= 1-hidden[i]*hidden[i];
W0_grad[i*(inDim+1)] += grad;
for (j=0;j<inDim;j++)
W0_grad[i*(inDim+1)+j+1] += grad*in[j];
}
}
return rms;
}
#define NB_THREADS 8
sem_t sem_begin[NB_THREADS];
sem_t sem_end[NB_THREADS];
struct GradientArg {
int id;
int done;
MLPTrain *net;
float *inputs;
float *outputs;
int nbSamples;
double *W0_grad;
double *W1_grad;
double rms;
double error_rate[MAX_OUT];
};
void *gradient_thread_process(void *_arg)
{
int W0_size, W1_size;
struct GradientArg *arg = _arg;
int *topo = arg->net->topo;
W0_size = (topo[0]+1)*topo[1];
W1_size = (topo[1]+1)*topo[2];
double W0_grad[W0_size];
double W1_grad[W1_size];
arg->W0_grad = W0_grad;
arg->W1_grad = W1_grad;
while (1)
{
sem_wait(&sem_begin[arg->id]);
if (arg->done)
break;
arg->rms = compute_gradient(arg->net, arg->inputs, arg->outputs, arg->nbSamples, arg->W0_grad, arg->W1_grad, arg->error_rate);
sem_post(&sem_end[arg->id]);
}
fprintf(stderr, "done\n");
return NULL;
}
float mlp_train_backprop(MLPTrain *net, float *inputs, float *outputs, int nbSamples, int nbEpoch, float rate)
{
int i, j;
int e;
float best_rms = 1e10;
int inDim, outDim, hiddenDim;
int *topo;
double *W0, *W1, *best_W0, *best_W1;
double *W0_old, *W1_old;
double *W0_old2, *W1_old2;
double *W0_grad, *W1_grad;
double *W0_oldgrad, *W1_oldgrad;
double *W0_rate, *W1_rate;
double *best_W0_rate, *best_W1_rate;
int W0_size, W1_size;
topo = net->topo;
W0_size = (topo[0]+1)*topo[1];
W1_size = (topo[1]+1)*topo[2];
struct GradientArg args[NB_THREADS];
pthread_t thread[NB_THREADS];
int samplePerPart = nbSamples/NB_THREADS;
int count_worse=0;
int count_retries=0;
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
topo = net->topo;
inDim = net->topo[0];
hiddenDim = net->topo[1];
outDim = net->topo[2];
W0 = net->weights[0];
W1 = net->weights[1];
best_W0 = net->best_weights[0];
best_W1 = net->best_weights[1];
W0_old = malloc(W0_size*sizeof(double));
W1_old = malloc(W1_size*sizeof(double));
W0_old2 = malloc(W0_size*sizeof(double));
W1_old2 = malloc(W1_size*sizeof(double));
W0_grad = malloc(W0_size*sizeof(double));
W1_grad = malloc(W1_size*sizeof(double));
W0_oldgrad = malloc(W0_size*sizeof(double));
W1_oldgrad = malloc(W1_size*sizeof(double));
W0_rate = malloc(W0_size*sizeof(double));
W1_rate = malloc(W1_size*sizeof(double));
best_W0_rate = malloc(W0_size*sizeof(double));
best_W1_rate = malloc(W1_size*sizeof(double));
memcpy(W0_old, W0, W0_size*sizeof(double));
memcpy(W0_old2, W0, W0_size*sizeof(double));
memset(W0_grad, 0, W0_size*sizeof(double));
memset(W0_oldgrad, 0, W0_size*sizeof(double));
memcpy(W1_old, W1, W1_size*sizeof(double));
memcpy(W1_old2, W1, W1_size*sizeof(double));
memset(W1_grad, 0, W1_size*sizeof(double));
memset(W1_oldgrad, 0, W1_size*sizeof(double));
rate /= nbSamples;
for (i=0;i<hiddenDim;i++)
for (j=0;j<inDim+1;j++)
W0_rate[i*(inDim+1)+j] = rate*net->in_rate[j];
for (i=0;i<W1_size;i++)
W1_rate[i] = rate;
for (i=0;i<NB_THREADS;i++)
{
args[i].net = net;
args[i].inputs = inputs+i*samplePerPart*inDim;
args[i].outputs = outputs+i*samplePerPart*outDim;
args[i].nbSamples = samplePerPart;
args[i].id = i;
args[i].done = 0;
sem_init(&sem_begin[i], 0, 0);
sem_init(&sem_end[i], 0, 0);
pthread_create(&thread[i], NULL, gradient_thread_process, &args[i]);
}
for (e=0;e<nbEpoch;e++)
{
double rms=0;
double error_rate[2] = {0,0};
for (i=0;i<NB_THREADS;i++)
{
sem_post(&sem_begin[i]);
}
memset(W0_grad, 0, W0_size*sizeof(double));
memset(W1_grad, 0, W1_size*sizeof(double));
for (i=0;i<NB_THREADS;i++)
{
sem_wait(&sem_end[i]);
rms += args[i].rms;
error_rate[0] += args[i].error_rate[0];
error_rate[1] += args[i].error_rate[1];
for (j=0;j<W0_size;j++)
W0_grad[j] += args[i].W0_grad[j];
for (j=0;j<W1_size;j++)
W1_grad[j] += args[i].W1_grad[j];
}
float mean_rate = 0, min_rate = 1e10;
rms = (rms/(outDim*nbSamples));
error_rate[0] = (error_rate[0]/(nbSamples));
error_rate[1] = (error_rate[1]/(nbSamples));
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
fprintf (stderr, "%f %f (%f %f) ", error_rate[0], error_rate[1], rms, best_rms);
if (rms < best_rms)
{
best_rms = rms;
for (i=0;i<W0_size;i++)
{
best_W0[i] = W0[i];
best_W0_rate[i] = W0_rate[i];
}
for (i=0;i<W1_size;i++)
{
best_W1[i] = W1[i];
best_W1_rate[i] = W1_rate[i];
}
count_worse=0;
count_retries=0;
} else {
count_worse++;
if (count_worse>30)
{
count_retries++;
count_worse=0;
for (i=0;i<W0_size;i++)
{
W0[i] = best_W0[i];
best_W0_rate[i] *= .7;
if (best_W0_rate[i]<1e-15) best_W0_rate[i]=1e-15;
W0_rate[i] = best_W0_rate[i];
W0_grad[i] = 0;
}
for (i=0;i<W1_size;i++)
{
W1[i] = best_W1[i];
best_W1_rate[i] *= .8;
if (best_W1_rate[i]<1e-15) best_W1_rate[i]=1e-15;
W1_rate[i] = best_W1_rate[i];
W1_grad[i] = 0;
}
}
}
if (count_retries>10)
break;
for (i=0;i<W0_size;i++)
{
if (W0_oldgrad[i]*W0_grad[i] > 0)
W0_rate[i] *= 1.01;
else if (W0_oldgrad[i]*W0_grad[i] < 0)
W0_rate[i] *= .9;
mean_rate += W0_rate[i];
if (W0_rate[i] < min_rate)
min_rate = W0_rate[i];
if (W0_rate[i] < 1e-15)
W0_rate[i] = 1e-15;
/*if (W0_rate[i] > .01)
W0_rate[i] = .01;*/
W0_oldgrad[i] = W0_grad[i];
W0_old2[i] = W0_old[i];
W0_old[i] = W0[i];
W0[i] += W0_grad[i]*W0_rate[i];
}
for (i=0;i<W1_size;i++)
{
if (W1_oldgrad[i]*W1_grad[i] > 0)
W1_rate[i] *= 1.01;
else if (W1_oldgrad[i]*W1_grad[i] < 0)
W1_rate[i] *= .9;
mean_rate += W1_rate[i];
if (W1_rate[i] < min_rate)
min_rate = W1_rate[i];
if (W1_rate[i] < 1e-15)
W1_rate[i] = 1e-15;
W1_oldgrad[i] = W1_grad[i];
W1_old2[i] = W1_old[i];
W1_old[i] = W1[i];
W1[i] += W1_grad[i]*W1_rate[i];
}
mean_rate /= (topo[0]+1)*topo[1] + (topo[1]+1)*topo[2];
fprintf (stderr, "%g %d", mean_rate, e);
if (count_retries)
fprintf(stderr, " %d", count_retries);
fprintf(stderr, "\n");
if (stopped)
break;
}
for (i=0;i<NB_THREADS;i++)
{
args[i].done = 1;
sem_post(&sem_begin[i]);
pthread_join(thread[i], NULL);
fprintf (stderr, "joined %d\n", i);
}
free(W0_old);
free(W1_old);
free(W0_grad);
free(W1_grad);
free(W0_rate);
free(W1_rate);
return best_rms;
}
int main(int argc, char **argv)
{
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
int i, j;
int nbInputs;
int nbOutputs;
int nbHidden;
int nbSamples;
int nbEpoch;
int nbRealInputs;
unsigned int seed;
int ret;
float rms;
float *inputs;
float *outputs;
if (argc!=6)
{
fprintf (stderr, "usage: mlp_train <inputs> <hidden> <outputs> <nb samples> <nb epoch>\n");
return 1;
}
nbInputs = atoi(argv[1]);
nbHidden = atoi(argv[2]);
nbOutputs = atoi(argv[3]);
nbSamples = atoi(argv[4]);
nbEpoch = atoi(argv[5]);
nbRealInputs = nbInputs;
inputs = malloc(nbInputs*nbSamples*sizeof(*inputs));
outputs = malloc(nbOutputs*nbSamples*sizeof(*outputs));
seed = time(NULL);
/*seed = 1361480659;*/
fprintf (stderr, "Seed is %u\n", seed);
srand(seed);
build_tansig_table();
signal(SIGTERM, handler);
signal(SIGINT, handler);
signal(SIGHUP, handler);
for (i=0;i<nbSamples;i++)
{
for (j=0;j<nbRealInputs;j++)
ret = scanf(" %f", &inputs[i*nbInputs+j]);
for (j=0;j<nbOutputs;j++)
ret = scanf(" %f", &outputs[i*nbOutputs+j]);
if (feof(stdin))
{
nbSamples = i;
break;
}
}
int topo[3] = {nbInputs, nbHidden, nbOutputs};
MLPTrain *net;
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
fprintf (stderr, "Got %d samples\n", nbSamples);
net = mlp_init(topo, 3, inputs, outputs, nbSamples);
rms = mlp_train_backprop(net, inputs, outputs, nbSamples, nbEpoch, 1);
printf ("#include \"mlp.h\"\n\n");
printf ("/* RMS error was %f, seed was %u */\n\n", rms, seed);
printf ("static const float weights[%d] = {\n", (topo[0]+1)*topo[1] + (topo[1]+1)*topo[2]);
printf ("\n/* hidden layer */\n");
for (i=0;i<(topo[0]+1)*topo[1];i++)
{
printf ("%gf, ", net->weights[0][i]);
if (i%5==4)
printf("\n");
}
printf ("\n/* output layer */\n");
for (i=0;i<(topo[1]+1)*topo[2];i++)
{
printf ("%g, ", net->weights[1][i]);
if (i%5==4)
printf("\n");
}
printf ("};\n\n");
printf ("static const int topo[3] = {%d, %d, %d};\n\n", topo[0], topo[1], topo[2]);
printf ("const MLP net = {\n");
printf ("\t3,\n");
printf ("\ttopo,\n");
printf ("\tweights\n};\n");
return 0;