ConvNet  1.0
A GPU-based C++ implementation of Convolutional Neural Nets
 All Classes Namespaces Functions Variables
datahandler.h
1 #ifndef DATAHANDLER_H_
2 #define DATAHANDLER_H_
3 #include "layer.h"
4 #include "image_iterators.h"
5 #include <random>
6 #include <thread>
7 class DataIterator;
8 
14 class DataHandler {
15  public:
16  DataHandler(const config::DatasetConfig& config);
17  virtual ~DataHandler();
18 
19  void GetBatch(vector<Layer*>& data_layers);
20  int GetBatchSize() const { return batch_size_; }
21  int GetDataSetSize() const { return dataset_size_; }
22  void Seek(int row);
23  void Preprocess(Matrix& input, Matrix& output);
24  void Sync();
25 
26  protected:
27  void SetupShuffler();
28  void ShuffleIndices();
29  void LoadChunk(DataIterator& it, Matrix& mat);
30  void LoadChunk(DataIterator& it, Matrix& mat, vector<int>& random_rows);
31 
32  void PipelinedDiskAccess();
33  void DiskAccess();
34  void StartPreload();
35  void WaitForPreload();
36 
37  default_random_engine generator_;
38  uniform_int_distribution<int> * distribution_;
39  map<string, DataIterator*> data_it_;
40  map<string, Matrix> data_;
41  vector<string> layer_names_;
42  thread* preload_thread_;
43  Matrix rand_perm_indices_;
44  int batch_size_, chunk_size_, max_reuse_count_, reuse_counter_,
45  random_access_chunk_size_, dataset_size_, start_;
46  bool restart_, nothing_on_gpu_, fits_on_gpu_;
47  const bool pipeline_loads_, randomize_cpu_, randomize_gpu_;
48 };
49 
55 class DataIterator {
56  public:
57  DataIterator(const config::DataStreamConfig& config);
58  virtual ~DataIterator() {};
59  virtual void GetNext(float* data_out) = 0;
60  virtual void Seek(int row);
61  void Preprocess(Matrix& m);
62  void AddNoise(Matrix& input, Matrix& output);
63  int GetDims() const;
64  int GetDataSetSize() const;
65  void AddPCANoise(Matrix& m);
66  void SetJitterVariables(int max_offset);
67  void Jitter(Matrix& source, Matrix& dest);
68  static DataIterator* ChooseDataIterator(const config::DataStreamConfig& config);
69 
70  protected:
71  void LoadMeans(const string& data_file);
72 
73  int num_dims_, dataset_size_, row_;
74  Matrix mean_, std_, pca_noise1_, pca_noise2_, eig_values_, eig_vectors_,
75  width_offset_, height_offset_, flip_bit_;
76  const string file_pattern_;
77  const int num_colors_, gpu_id_;
78  const bool translate_, flip_, normalize_, pixelwise_normalize_, add_pca_noise_;
79  const float pca_noise_stddev_;
80 };
81 
86  public:
87  DummyDataIterator(const config::DataStreamConfig& config);
88  void GetNext(float* data_out);
89 };
90 
93 template <typename T>
95  public:
96  HDF5DataIterator(const config::DataStreamConfig& config);
98  void GetNext(float* data_out);
99  void GetNext(float* data_out, const int row);
100 
101  protected:
102  hid_t file_, dataset_, dapl_id_, m_dataspace_, type_;
103  hsize_t start_[2], count_[2];
104  T* buf_;
105 };
106 
109  public:
110  ImageDataIterator(const config::DataStreamConfig& config);
112  virtual void GetNext(float* data_out);
113  virtual void Seek(int row);
114 
115  protected:
117  unsigned char* buf_;
118  const int raw_image_size_, image_size_;
119 };
120 
123  public:
124  SlidingWindowDataIterator(const config::DataStreamConfig& config);
126  virtual void GetNext(float* data_out);
127  virtual void Seek(int row);
128 
129  protected:
131  unsigned char* buf_;
132  vector<string> file_names_;
133  const int stride_, raw_image_size_, image_size_;
134  int file_id_;
135 };
136 
142  public:
143  TextDataIterator(const config::DataStreamConfig& config);
144  ~TextDataIterator();
145  virtual void GetNext(float* data_out);
146 
147  protected:
148  float* data_;
149 };
150 
154 class DataWriter {
155  public:
156  DataWriter(const string& output_file, const int dataset_size);
157  ~DataWriter();
158  virtual void AddStream(const string& name, const int numdims);
159  virtual void Write(Matrix& mat, const int data_id, const int rows);
160 
161  private:
162  const string output_file_;
163  const int dataset_size_;
164  vector<int> numdims_;
165  vector<hid_t> dataset_handle_, dataspace_handle_;
166  vector<int> current_row_;
167  hid_t file_;
168  int num_streams_;
169 };
170 
175  public:
176  AveragedDataWriter(const string& output_file, const int dataset_size,
177  const int avg_after, int max_batchsize);
179  virtual void AddStream(const string& name, const int numdims);
180  virtual void Write(Matrix& mat, const int data_id, const int rows);
181  private:
182  const int avg_after_, max_batchsize_;
183  vector<Matrix*> buf_;
184  vector<int> counter_;
185 };
186 
191  public:
192  SequentialAveragedDataWriter(const string& output_file, const int dataset_size,
193  const int avg_after);
195  virtual void AddStream(const string& name, const int numdims);
196  virtual void Write(Matrix& mat, const int data_id, const int rows);
197 
198  private:
199  const int avg_after_, dataset_size_;
200  vector<Matrix*> buf_;
201  int consumed_, num_rows_written_;
202 };
203 
204 #endif
Averages a specified number of consecutive entries and writes the average into an HDF5 file...
Definition: datahandler.h:190
Base class for implementing data iterators.
Definition: datahandler.h:55
An iterator over sliding windows of an image dataset.
Definition: datahandler.h:122
Writes data into an HDF5 file.
Definition: datahandler.h:154
A dummy iterator.
Definition: datahandler.h:85
An iterator over a dataset in an HDF5 file.
Definition: datahandler.h:94
A GPU matrix class.
Definition: matrix.h:11
An iterator over images stored as individual files.
Definition: datahandler.h:108
Buffers a specified number of batches, averages them and then writes the average into an HDF5 file...
Definition: datahandler.h:174
Makes data accessible to the model.
Definition: datahandler.h:14
An iterator over data stored in a text file.
Definition: datahandler.h:141