UESMANN CPP  1.0
Reference implementation of UESMANN
/home/travis/build/jimfinnis/uesmanncpp/bpnet.hpp
Go to the documentation of this file.
1 
7 #ifndef __BPNET_HPP
8 #define __BPNET_HPP
9 
10 #include "net.hpp"
11 
18 class BPNet : public Net {
19 protected:
25  }
26 
32  void init(int nlayers,const int *layerCounts){
33  numLayers = nlayers;
34  outputs = new double* [numLayers];
35  errors = new double* [numLayers];
36  layerSizes = new int [numLayers];
38  for(int i=0;i<numLayers;i++){
39  int n = layerCounts[i];
40  outputs[i] = new double[n];
41  errors[i] = new double[n];
42  for(int k=0;k<n;k++)
43  outputs[i][k]=0;
44  layerSizes[i]=n;
45  if(n>largestLayerSize)
47  }
48 
49  weights = new double * [numLayers];
50  gradAvgsWeights = new double* [numLayers];
51  biases = new double* [numLayers];
52  gradAvgsBiases = new double* [numLayers];
53  for(int i=0;i<numLayers;i++){
54  int n = layerCounts[i];
55  weights[i] = new double[largestLayerSize*largestLayerSize];
57  biases[i] = new double[n];
58  gradAvgsBiases[i] = new double[n];
59  }
60  }
61 
62 public:
69  BPNet(int nlayers,const int *layerCounts) : Net(NetType::PLAIN) {
70  init(nlayers,layerCounts);
71  }
72 
73  virtual void setH(double h){
74  // does nothing, because this is an unmodulated net.
75  }
76 
77  virtual double getH() const {
78  return 0;
79  }
80 
81 
86  virtual ~BPNet(){
87  for(int i=0;i<numLayers;i++){
88  delete [] weights[i];
89  delete [] biases[i];
90  delete [] gradAvgsWeights[i];
91  delete [] gradAvgsBiases[i];
92  delete [] outputs[i];
93  delete [] errors[i];
94  }
95  delete [] weights;
96  delete [] biases;
97  delete [] gradAvgsWeights;
98  delete [] gradAvgsBiases;
99  delete [] outputs;
100  delete [] errors;
101  delete [] layerSizes;
102  }
103 
104  virtual void setInputs(double *d) {
105  for(int i=0;i<layerSizes[0];i++){
106  outputs[0][i]=d[i];
107  }
108  }
109 
115  void setInput(int n, double d){
116  outputs[0][n] = d;
117  }
118 
119 
120  virtual double *getOutputs() const {
121  return outputs[numLayers-1];
122  }
123 
124  virtual int getLayerSize(int n) const {
125  return layerSizes[n];
126  }
127 
128  virtual int getLayerCount() const {
129  return numLayers;
130  }
131 
132 
133 
134  virtual int getDataSize() const {
135  // number of weights+biases for each layer is
136  // the number of nodes in that layer (bias count)
137  // times the number of nodes in the previous layer.
138  //
139  // NOTE THAT this uses the true layer size rather than
140  // the fake version returned in the subclass HInputNet
141  int pc=0;
142  int total=0;
143  for(int i=0;i<numLayers;i++){
144  int c = layerSizes[i];
145  total += c*(1+pc);
146  pc = c;
147  }
148  return total;
149  }
150 
151  virtual void save(double *buf) const {
152  double *g=buf;
153  // data is ordered by layers, with nodes within
154  // layers, and each node is bias then weights.
155  //
156  // NOTE THAT this uses the true layer size rather than
157  // the fake version returned in the subclass HInputNet
158  for(int i=0;i<numLayers;i++){
159  for(int j=0;j<layerSizes[i];j++){
160  *g++ = biases[i][j];
161  if(i){
162  for(int k=0;k<layerSizes[i-1];k++){
163  *g++ = getw(i,j,k);
164  }
165  }
166  }
167  }
168  }
169 
170  virtual void load(double *buf){
171  double *g=buf;
172  // genome is ordered by layers, with nodes within
173  // layers, and each node is bias then weights.
174  //
175  // NOTE THAT this uses the true layer size rather than
176  // the fake version returned in the subclass HInputNet
177  for(int i=0;i<numLayers;i++){
178  for(int j=0;j<layerSizes[i];j++){
179  biases[i][j]=*g++;
180  if(i){
181  for(int k=0;k<layerSizes[i-1];k++){
182  getw(i,j,k) = *g++;
183  }
184  }
185  }
186  }
187  }
188 
189 protected:
190  int numLayers;
191  int *layerSizes;
193 
205  double **weights;
206 
208  double **biases;
209 
210  // data generated during training and running
211 
212  double **outputs;
213  double **errors;
214 
215  double **gradAvgsWeights;
216  double **gradAvgsBiases;
217 
218  virtual void initWeights(double initr){
219  for(int i=0;i<numLayers;i++){
220  double initrange;
221  if(i){
222  double ct = layerSizes[i-1];
223  if(initr>0)
224  initrange = initr;
225  else
226  initrange = 1.0/sqrt(ct); // from Bishop
227  } else
228  initrange = 0.1; // on input layer, should mean little.
229  for(int j=0;j<layerSizes[i];j++)
230  biases[i][j]=drand(-initrange,initrange);
231  for(int j=0;j<largestLayerSize*largestLayerSize;j++){
232  weights[i][j]=drand(-initrange,initrange);
233  }
234  }
235  // zero the input layer weights, which should be unused.
236  for(int j=0;j<layerSizes[0];j++)
237  biases[0][j]=0;
238  for(int j=0;j<largestLayerSize*largestLayerSize;j++)
239  weights[0][j]=0;
240  }
241 
249  inline double& getw(int tolayer,int toneuron,int fromneuron) const {
250  return weights[tolayer][toneuron+largestLayerSize*fromneuron];
251  }
252 
259  inline double& getb(int layer,int neuron) const {
260  return biases[layer][neuron];
261  }
262 
263 
272  inline double& getavggradw(int tolayer,int toneuron,int fromneuron) const {
273  return gradAvgsWeights[tolayer][toneuron+largestLayerSize*fromneuron];
274  }
275 
283  inline double getavggradb(int l,int n) const {
284  return gradAvgsBiases[l][n];
285  }
286 
294  void calcError(double *in,double *out){
295  // first run the network forwards
296  setInputs(in);
297  update();
298 
299  // first, calculate the error in the output layer
300  int ol = numLayers-1;
301  for(int i=0;i<layerSizes[ol];i++){
302  double o = outputs[ol][i];
303  errors[ol][i] = o*(1-o)*(o-out[i]);
304  }
305 
306  // then work out the errors in all the other layers
307  for(int l=1;l<numLayers-1;l++){
308  for(int j=0;j<layerSizes[l];j++){
309  double e = 0;
310  for(int i=0;i<layerSizes[l+1];i++)
311  e += errors[l+1][i]*getw(l+1,i,j);
312 
313  // produce the \delta^l_i term where l is the layer and i
314  // the index of the node
315  errors[l][j] = e * outputs[l][j] * (1-outputs[l][j]);
316  }
317  }
318  }
319 
320  virtual void update(){
321  for(int i=1;i<numLayers;i++){
322  for(int j=0;j<layerSizes[i];j++){
323  double v = biases[i][j];
324  for(int k=0;k<layerSizes[i-1];k++){
325  v += getw(i,j,k) * outputs[i-1][k];
326  }
327  outputs[i][j]=sigmoid(v);
328  }
329  }
330  }
331 
332  virtual double trainBatch(ExampleSet& ex,int start,int num,double eta){
333  // zero average gradients
334  for(int j=0;j<numLayers;j++){
335  for(int k=0;k<layerSizes[j];k++)
336  gradAvgsBiases[j][k]=0;
337  for(int i=0;i<largestLayerSize*largestLayerSize;i++)
338  gradAvgsWeights[j][i]=0;
339  }
340 
341  // reset total error
342  double totalError=0;
343  // iterate over examples
344  for(int nn=0;nn<num;nn++){
345  int exampleIndex = nn+start;
346  // set modulator
347  setH(ex.getH(exampleIndex));
348  // get outputs for this example
349  double *outs = ex.getOutputs(exampleIndex);
350  // build errors for each example
351  calcError(ex.getInputs(exampleIndex),outs);
352 
353  // accumulate errors
354  for(int l=1;l<numLayers;l++){
355  for(int i=0;i<layerSizes[l];i++){
356  for(int j=0;j<layerSizes[l-1];j++)
357  getavggradw(l,i,j) += errors[l][i]*outputs[l-1][j];
358  gradAvgsBiases[l][i] += errors[l][i];
359  }
360  }
361  // count up the total error
362  int ol = numLayers-1;
363  for(int i=0;i<layerSizes[ol];i++){
364  double o = outputs[ol][i];
365  double e = (o-outs[i]);
366  totalError += e*e;
367  }
368  }
369 
370  // for calculating average error - 1/number of examples trained
371  double factor = 1.0/(double)num;
372  // we now have a full set of running averages. Time to apply them.
373  for(int l=1;l<numLayers;l++){
374  for(int i=0;i<layerSizes[l];i++){
375  for(int j=0;j<layerSizes[l-1];j++){
376  double wdelta = eta*getavggradw(l,i,j)*factor;
377 // printf("WCORR: %f factor %f\n",wdelta,getavggradw(l,i,j));
378  getw(l,i,j) -= wdelta;
379  }
380  double bdelta = eta*gradAvgsBiases[l][i]*factor;
381  biases[l][i] -= bdelta;
382  }
383  }
384  // and return total error - this is the SUM of the MSE of each output
385  return totalError*factor;
386  }
387 };
388 
389 
390 #endif /* __BPNET_HPP */
BPNet(int nlayers, const int *layerCounts)
Constructor - does not initialise the weights to random values so that we can reinitialise networks...
Definition: bpnet.hpp:69
NetType
The different types of network - each has an associated integer for saving/loading file data...
Definition: netType.hpp:15
virtual void setInputs(double *d)
Set the inputs to the network before running or training.
Definition: bpnet.hpp:104
double ** gradAvgsBiases
average gradient for each bias (built during training)
Definition: bpnet.hpp:216
BPNet()
Special constructor for subclasses which need to manipulate layer count before initialisation (e...
Definition: bpnet.hpp:24
void init(int nlayers, const int *layerCounts)
Initialiser for use by the main constructor and the ctors of those subclasses mentioned in BPNet() ...
Definition: bpnet.hpp:32
void calcError(double *in, double *out)
run a single example and calculate the errors; used in training.
Definition: bpnet.hpp:294
int numLayers
number of layers, including input and output
Definition: bpnet.hpp:190
double ** biases
array of biases, stored as a rectangular array of [layer][node]
Definition: bpnet.hpp:208
This is the abstract basic network class - the training methods are in each subclass.
double & getavggradw(int tolayer, int toneuron, int fromneuron) const
get the value of the gradient for a given weight
Definition: bpnet.hpp:272
virtual void load(double *buf)
Given that the pointer points to a data block of the correct size for the current network...
Definition: bpnet.hpp:170
The "basic" back-propagation network using a logistic sigmoid, as described by Rumelhart, Hinton and Williams (and many others). This class is used by output blending and h-as-input networks.
Definition: bpnet.hpp:18
double * getOutputs(int example)
Get a pointer to the outputs for a given example, for reading or writing.
Definition: data.hpp:349
double sigmoid(double x)
Definition: net.hpp:20
virtual void update()
Run a single update of the network.
Definition: bpnet.hpp:320
virtual double trainBatch(ExampleSet &ex, int start, int num, double eta)
Train a network for batch (or mini-batch) (or single example).
Definition: bpnet.hpp:332
virtual void save(double *buf) const
Serialize the data (not including any network type magic number or layer/node counts) to the given me...
Definition: bpnet.hpp:151
double & getw(int tolayer, int toneuron, int fromneuron) const
get the value of a weight.
Definition: bpnet.hpp:249
virtual int getDataSize() const
Get the length of the serialised data block for this network.
Definition: bpnet.hpp:134
double & getb(int layer, int neuron) const
get the value of a bias
Definition: bpnet.hpp:259
virtual void initWeights(double initr)
initialise weights to random values
Definition: bpnet.hpp:218
double getavggradb(int l, int n) const
get the value of a bias gradient
Definition: bpnet.hpp:283
virtual void setH(double h)
Set the modulator level for subsequent runs and training of this network.
Definition: bpnet.hpp:73
double ** outputs
outputs of each layer: one array of doubles for each
Definition: bpnet.hpp:212
double ** errors
the error for each node, calculated by calcError()
Definition: bpnet.hpp:213
virtual int getLayerCount() const
Get the number of layers.
Definition: bpnet.hpp:128
virtual int getLayerSize(int n) const
Get the number of nodes in a given layer.
Definition: bpnet.hpp:124
virtual double * getOutputs() const
Get the outputs after running.
Definition: bpnet.hpp:120
virtual ~BPNet()
destructor
Definition: bpnet.hpp:86
void setInput(int n, double d)
Used to set inputs manually, typically in HInputNet.
Definition: bpnet.hpp:115
double getH(int example) const
Get the h (modulator) for a given example.
Definition: data.hpp:359
double ** gradAvgsWeights
average gradient for each weight (built during training)
Definition: bpnet.hpp:215
virtual double getH() const
get the modulator level
Definition: bpnet.hpp:77
double ** weights
Array of weights as [tolayer][tonode+largestLayerSize*fromnode].
Definition: bpnet.hpp:205
The abstract network type upon which all others are based. It&#39;s not pure virtual, in that it encapsul...
Definition: net.hpp:39
int largestLayerSize
number of nodes in largest layer
Definition: bpnet.hpp:192
double * getInputs(int example)
Get a pointer to the inputs for a given example, for reading or writing.
Definition: data.hpp:338
double drand(double mn, double mx)
get a random number using this net&#39;s PRNG data
Definition: net.hpp:591
A set of example data. Each datum consists of hormone (i.e. modulator value), inputs and outputs...
Definition: data.hpp:57
int * layerSizes
array of layer sizes
Definition: bpnet.hpp:191