UESMANN CPP  1.0
Reference implementation of UESMANN
/home/travis/build/jimfinnis/uesmanncpp/data.hpp
Go to the documentation of this file.
1 
7 #ifndef __DATA_H
8 #define __DATA_H
9 
10 #include <assert.h>
11 #include <stdint.h>
12 
13 #include "mnist.hpp"
14 
26 template <class T,class TestFunc> void alternate(T *arr,int nitems,int cycle,TestFunc f){
27  // for each item, if it is not the appropriate value,
28  // scan forward until we find one which is and swap with that.
29  // Leave if we can't find one.
30  for(int i=0;i<nitems;i++){
31  if(f(arr[i])%cycle!=(i%cycle)){
32  // doesn't match; swap.
33  for(int j=i;;j++){
34  if(j>=nitems)return; // can't find a match, exit.
35  // scan for one that does
36  if(f(arr[j])%cycle==i%cycle){
37  // and swap and leave loop
38  T v=arr[i];
39  arr[i]=arr[j];
40  arr[j]=v;
41  break;
42  }
43  }
44  }
45  }
46 }
47 
48 
57 class ExampleSet {
58  double **examples;
59  double *data;
60 
61  int ninputs;
62  int noutputs;
63  int ct;
64 
65  uint32_t outputOffset;
66  uint32_t hOffset;
67 
75  bool ownsData;
76 
81  int numHLevels;
82 
86  double minH;
90  double maxH;
91 
92 
93 public:
94 
95 
104  ExampleSet(int n,int nin,int nout,int levels){
105  ninputs=nin;
106  noutputs=nout;
107  ct=n;
108  numHLevels = levels;
109  minH=0;
110  maxH=1;
111 
112 // printf("Allocating new set %d*(%d,%d)\n",
113 // n,ninputs,noutputs);
114 
115  // size of a single example: number of inputs plus number of outputs
116  // plus one for the modulator.
117 
118  uint32_t exampleSize = ninputs+noutputs+1;
119 
120  // calculate the offsets
121  outputOffset = ninputs;
122  hOffset = ninputs+noutputs;
123 
124  data = new double[exampleSize*ct]; // allocate data
125  examples = new double*[ct]; // allocate example pointers
126 
127  for(int i=0;i<ct;i++){
128  // work out and store the example pointer
129  examples[i] = data+i*exampleSize;
130 
131  }
132  ownsData = true;
133  }
134 
143  ExampleSet(const ExampleSet &parent,int start,int length){
144  if(length > parent.ct - start || start<0 || length<1)
145  throw std::out_of_range("subset out of range");
146  ownsData = false;
147  ninputs = parent.ninputs;
148  noutputs = parent.noutputs;
149  outputOffset = ninputs;
150  hOffset = ninputs+noutputs;
151  data = parent.data;
152  examples = new double*[length];
153  ct = length;
154  numHLevels = parent.numHLevels;
155  minH = parent.minH;
156  maxH = parent.maxH;
157 
158  for(int i=0;i<ct;i++){
159  examples[i] = parent.examples[start+i];
160  }
161  }
162 
170  ExampleSet(const MNIST& mnist) : ExampleSet(
171  mnist.getCount(), // number of examples
172  mnist.r()*mnist.c(), // input count
173  mnist.getMaxLabel()+1, // output count
174  1 // single modulation level
175  ){
176  // fill in the data
177  for(int i=0;i<ct;i++){
178  // convert each pixel into a 0-1 double and store
179  uint8_t *imgpix = mnist.getImg(i);
180  double *inpix = getInputs(i);
181  for(int i=0;i<ninputs;i++){
182  double pixval = *imgpix++;
183  pixval /= 255.0;
184  *inpix++ = pixval;
185  }
186  // fill in the one-hot encoded output
187  double *out = getOutputs(i);
188  for(int outIdx=0;outIdx<noutputs;outIdx++){
189  out[outIdx] = mnist.getLabel(i)==outIdx?1:0;
190  }
191  setH(i,0); // set nominal modulator value
192  }
193  ownsData=true;
194  }
195 
202  if(ownsData){ // only delete the data if we aren't a subset
203  delete [] data;
204  }
205  }
206 
207 public:
208 
212  enum ShuffleMode {
247  };
248 
249 
259  void shuffle(drand48_data *rd,ShuffleMode mode,int nExamples=0){
260  if(mode == NONE) // this means we don't shuffle
261  return;
262 
263  if(!nExamples)
264  nExamples=ct;
265 
266  int blockSize; // size of the blocks we are shuffling, in bytes
267  if(mode == STRIDE)
268  blockSize = numHLevels;
269  else
270  blockSize = 1;
271  double **tmp = new double*[blockSize]; // temporary storage for swapping
272 
273  for(int i=(nExamples/blockSize)-1;i>=1;i--){
274  long lr;
275  lrand48_r(rd,&lr);
276  int j = lr%(i+1);
277  memcpy(tmp,examples+i*blockSize,blockSize*sizeof(double*));
278  memcpy(examples+i*blockSize,examples+j*blockSize,blockSize*sizeof(double*));
279  memcpy(examples+j*blockSize,tmp,blockSize*sizeof(double*));
280  }
281  // if this mode is set, rearrange the shuffled data so that the h-levels cycle
282  if(mode == ALTERNATE){
283  alternate<double*>(examples, nExamples, numHLevels,
284  // abominations like this are why I used an overcomplicated
285  // example system at first...
286  [this](double *e){
287  double d = (e[hOffset]-minH)/(maxH-minH);
288  int i = (int)(d*(numHLevels-1));
289  return i;
290  });
291  }
292  delete [] tmp;
293  }
294 
300  ExampleSet& setHRange(double mn,double mx){
301  minH = mn;
302  maxH = mx;
303  return *this;
304  }
305 
306 
311  int getInputCount() const {
312  return ninputs;
313  }
314 
319  int getOutputCount() const {
320  return noutputs;
321  }
322 
327  int getCount() const {
328  return ct;
329  }
330 
331 
338  double *getInputs(int example) {
339  assert(example<ct);
340  return examples[example]; // inputs are first in each block
341  }
342 
349  double *getOutputs(int example) {
350  assert(example<ct);
351  return examples[example] + outputOffset;
352  }
353 
359  double getH(int example) const {
360  assert(example<ct);
361  return *(examples[example] + hOffset);
362  }
363 
368  return numHLevels;
369  }
370 
371 
378  void setH(int example, double h){
379  assert(example<ct);
380  *(examples[example] + hOffset) = h;
381  }
382 
388  void dump(int start=0,int end=-1){
389  if(end<0)end=ct;
390  for(int i=start;i<end;i++){
391  double *ins = getInputs(i);
392  double *outs = getOutputs(i);
393  for(int j=0;j<ninputs;j++){
394  printf("%f ",ins[j]);
395  }
396  printf(" modulator %f --> ",getH(i));
397  for(int j=0;j<noutputs;j++){
398  printf("%f ",outs[j]);
399  }
400  printf("\n");
401  }
402  }
403 
404 };
405 
406 
407 
408 #endif /* __DATA_H */
int getNumHLevels()
return the number of different H-levels
Definition: data.hpp:367
int getCount() const
get the number of examples
Definition: data.hpp:327
ExampleSet(int n, int nin, int nout, int levels)
Constructor - creates but doesn&#39;t fill in the data.
Definition: data.hpp:104
void setH(int example, double h)
Set the h (modulator) for a given example.
Definition: data.hpp:378
~ExampleSet()
Destructor - deletes data and offset array.
Definition: data.hpp:201
ShuffleMode
Shuffling mode for shuffle()
Definition: data.hpp:212
This class encapsulates and loads data in the standard MNIST format. The data resides in two files...
Definition: mnist.hpp:24
Shuffle single examples, but follow up by running a pass over the examples to ensure that they altern...
Definition: data.hpp:238
void alternate(T *arr, int nitems, int cycle, TestFunc f)
Ensure array has cycling values of some function f mod n. Given an array, this function will rearrang...
Definition: data.hpp:26
ExampleSet & setHRange(double mn, double mx)
Definition: data.hpp:300
double * getOutputs(int example)
Get a pointer to the outputs for a given example, for reading or writing.
Definition: data.hpp:349
void shuffle(drand48_data *rd, ShuffleMode mode, int nExamples=0)
Shuffle the example using a PRNG and a Fisher-Yates shuffle.
Definition: data.hpp:259
Shuffle single examples, no matter the value of numHLevels.
Definition: data.hpp:242
int getInputCount() const
get the number of inputs in all examples
Definition: data.hpp:311
uint8_t getLabel(int n) const
get the label for a given example
Definition: mnist.hpp:158
Don&#39;t shuffle examples at all.
Definition: data.hpp:246
Shuffle blocks of numHLevels examples, rather than single examples. This is intended for cases where ...
Definition: data.hpp:228
ExampleSet(const ExampleSet &parent, int start, int length)
Constructor for making a subset of another set. This uses the actual data in the parent, but creates a fresh set of offset structures which can be independently shuffled.
Definition: data.hpp:143
int getOutputCount() const
get the number of outputs in all examples
Definition: data.hpp:319
uint8_t * getImg(int n) const
get the bitmap for a given example
Definition: mnist.hpp:175
Code for converting MNIST data into example sets.
ExampleSet(const MNIST &mnist)
Special constructor for generating a data set from an MNIST database with a single labelling (i...
Definition: data.hpp:170
double getH(int example) const
Get the h (modulator) for a given example.
Definition: data.hpp:359
void dump(int start=0, int end=-1)
dump to stdout
Definition: data.hpp:388
double ins[][2]
possible inputs to boolean functions
Definition: genBoolMap.cpp:32
double * getInputs(int example)
Get a pointer to the inputs for a given example, for reading or writing.
Definition: data.hpp:338
A set of example data. Each datum consists of hormone (i.e. modulator value), inputs and outputs...
Definition: data.hpp:57