ConvNet  1.0
A GPU-based C++ implementation of Convolutional Neural Nets
 All Classes Namespaces Functions Variables
layer.h
1 #ifndef LAYER_H_
2 #define LAYER_H_
3 #include "edge.h"
4 #include <set>
5 
9 class Layer {
10  public:
12  Layer(const config::Layer& config);
13  ~Layer();
14 
19  virtual void AllocateMemory(int imgsize, int batch_size);
20 
26  virtual void ApplyActivation(bool train) = 0;
27 
34  virtual void ApplyDerivativeOfActivation() = 0;
35 
39  virtual void ComputeDeriv() = 0;
40 
44  virtual float GetLoss() = 0;
45 
49  virtual float GetLoss2();
50 
55  void ApplyDropout(bool train);
56 
61 
62  // Methods for preventing race conditions when using multiple GPUs.
63  void AccessStateBegin();
64  void AccessStateEnd();
65  void AccessDerivBegin();
66  void AccessDerivEnd();
67 
69  Edge* GetIncomingEdge(int index) { return incoming_edge_[index]; } // TODO:add check for size.
70 
72  Matrix& GetState() { return state_;}
73 
75  Matrix& GetDeriv() { return deriv_;}
76 
78  Matrix& GetData() { return data_;}
79 
80  void Display();
81  void Display(int image_id);
82 
84  void AddIncoming(Edge* e);
85 
87  void AddOutgoing(Edge* e);
88 
89  const string& GetName() const { return name_; }
90  int GetNumChannels() const { return num_channels_; }
91  int GetSize() const { return image_size_; }
92  bool IsInput() const { return is_input_; }
93  bool IsOutput() const { return is_output_; }
94 
95  int GetGPUId() const { return gpu_id_; }
96  void AllocateMemoryOnOtherGPUs();
97  Matrix& GetOtherState(int gpu_id);
98  Matrix& GetOtherDeriv(int gpu_id);
99  void SyncIncomingState();
100  void SyncOutgoingState();
101  void SyncIncomingDeriv();
102  void SyncOutgoingDeriv();
103 
104  static Layer* ChooseLayerClass(const config::Layer& layer_config);
105 
106  vector<Edge*> incoming_edge_, outgoing_edge_;
107  bool has_incoming_from_same_gpu_, has_outgoing_to_same_gpu_;
108  bool has_incoming_from_other_gpus_, has_outgoing_to_other_gpus_;
109 
110  protected:
111  void ApplyDropoutAtTrainTime();
112  void ApplyDropoutAtTestTime();
113 
114  const string name_;
115  const int num_channels_;
116  const bool is_input_, is_output_;
117  const float dropprob_;
118  const bool display_, dropout_scale_up_at_train_time_, gaussian_dropout_;
119 
120  // Maximum activation after applying gaussian dropout.
121  // This is needed to prevent blow ups due to sampling large values.
122  const float max_act_gaussian_dropout_;
123 
124  int scale_targets_, image_size_;
125 
126  Matrix state_;
130  map<int, Matrix> other_states_;
131  map<int, Matrix> other_derivs_;
134  const int gpu_id_;
135  set<int> other_incoming_gpu_ids_, other_outgoing_gpu_ids_;
136 };
137 
139 class LinearLayer : public Layer {
140  public:
141  LinearLayer(const config::Layer& config) : Layer(config) {};
142  virtual void AllocateMemory(int imgsize, int batch_size);
143  virtual void ApplyActivation(bool train);
144  virtual void ApplyDerivativeOfActivation();
145  virtual void ComputeDeriv();
146  virtual float GetLoss();
147 };
148 
150 class ReLULayer : public LinearLayer {
151  public:
152  ReLULayer(const config::Layer& config);
153  virtual void ApplyActivation(bool train);
154  virtual void ApplyDerivativeOfActivation();
155  protected:
156  const bool rectify_after_gaussian_dropout_;
157 };
158 
162 class SoftmaxLayer : public Layer {
163  public:
164  SoftmaxLayer(const config::Layer& config) : Layer(config) {};
165  virtual void AllocateMemory(int imgsize, int batch_size);
166  virtual void ApplyActivation(bool train);
167  virtual void ApplyDerivativeOfActivation();
168  virtual void ComputeDeriv();
169  virtual float GetLoss();
170  virtual float GetLoss2();
171 };
172 
178  public:
179  SoftmaxDistLayer(const config::Layer& config) : SoftmaxLayer(config) {};
180  virtual void AllocateMemory(int imgsize, int batch_size);
181  virtual void ComputeDeriv();
182  virtual float GetLoss();
183 
184  private:
185  Matrix cross_entropy_;
186 };
187 
190 class LogisticLayer : public Layer {
191  public:
192  LogisticLayer(const config::Layer& config) : Layer(config) {};
193  virtual void AllocateMemory(int image_size, int batch_size);
194  virtual void ApplyActivation(bool train);
195  virtual void ApplyDerivativeOfActivation();
196  virtual void ComputeDeriv();
197  virtual float GetLoss();
198 };
199 #endif
Matrix & GetData()
Returns a reference to the data at this layer.
Definition: layer.h:78
virtual void ComputeDeriv()
Compute derivative of loss function.
Definition: layer.cc:407
Matrix & GetDeriv()
Returns a reference to the deriv at this layer.
Definition: layer.h:75
The base class for all layers.
Definition: layer.h:9
virtual void ComputeDeriv()
Compute derivative of loss function.
Definition: layer.cc:293
virtual void AllocateMemory(int imgsize, int batch_size)
Allocate memory for storing the state and derivative at this layer.
Definition: layer.cc:317
Definition: util.h:59
virtual void ComputeDeriv()=0
Compute derivative of loss function.
Matrix data_
Deriv of the loss function w.r.t.
Definition: layer.h:128
void ApplyDropout(bool train)
Apply dropout to this layer.
Definition: layer.cc:251
virtual float GetLoss()
Compute the value of the loss function that is displayed during training.
Definition: layer.cc:454
virtual float GetLoss2()
Compute the value of the actual loss function.
Definition: layer.cc:234
Implements a layer with a logistic activation function.
Definition: layer.h:190
virtual void ApplyActivation(bool train)=0
Apply the activation function.
virtual float GetLoss()=0
Compute the value of the loss function that is displayed during training.
Implements a layer with a linear activation function.
Definition: layer.h:139
This class is intended to be used as a base class for implementing edges.
Definition: edge.h:13
Implements a layer with a rectified linear activation function.
Definition: layer.h:150
virtual void ApplyActivation(bool train)
Apply the activation function.
Definition: layer.cc:354
map< int, Matrix > other_states_
Need to store random variates when doing gaussian dropout.
Definition: layer.h:130
void AddIncoming(Edge *e)
Add an incoming edge to this layer.
Definition: layer.cc:55
Matrix rand_gaussian_
Data (targets) associated with this layer.
Definition: layer.h:129
virtual void ApplyActivation(bool train)
Apply the activation function.
Definition: layer.cc:430
virtual void AllocateMemory(int imgsize, int batch_size)
Allocate memory for storing the state and derivative at this layer.
Definition: layer.cc:163
virtual void AllocateMemory(int image_size, int batch_size)
Allocate memory for storing the state and derivative at this layer.
Definition: layer.cc:424
Implements a layer with a softmax activation function.
Definition: layer.h:177
Layer(const config::Layer &config)
Instantiate a layer from config.
Definition: layer.cc:32
void AddOutgoing(Edge *e)
Add an outgoing edge from this layer.
Definition: layer.cc:66
A GPU matrix class.
Definition: matrix.h:11
virtual void AllocateMemory(int imgsize, int batch_size)
Allocate memory for storing the state and derivative at this layer.
Definition: layer.cc:400
virtual void AllocateMemory(int imgsize, int batch_size)
Allocate memory for storing the state and derivative at this layer.
Definition: layer.cc:348
virtual float GetLoss()
Compute the value of the loss function that is displayed during training.
Definition: layer.cc:413
virtual void ApplyDerivativeOfActivation()
Apply the derivative of the activation.
Definition: layer.cc:439
virtual void ApplyActivation(bool train)
Apply the activation function.
Definition: layer.cc:279
ImageDisplayer * img_display_
Copies of this layer's deriv on other gpus.
Definition: layer.h:133
virtual void ApplyDerivativeOfActivation()
Apply the derivative of the activation.
Definition: layer.cc:339
map< int, Matrix > other_derivs_
Copies of this layer's state on other gpus.
Definition: layer.h:131
virtual void ComputeDeriv()
Compute derivative of loss function.
Definition: layer.cc:369
Implements a layer with a softmax activation function.
Definition: layer.h:162
virtual void ApplyDerivativeOfActivation()=0
Apply the derivative of the activation.
virtual void ApplyDerivativeOfActivation()
Apply the derivative of the activation.
Definition: layer.cc:362
virtual void ComputeDeriv()
Compute derivative of loss function.
Definition: layer.cc:448
Matrix & GetState()
Returns a reference to the state of the layer.
Definition: layer.h:72
virtual void ApplyDerivativeOfActivation()
Apply the derivative of the activation.
Definition: layer.cc:287
virtual void ApplyActivation(bool train)
Apply the activation function.
Definition: layer.cc:328
Edge * GetIncomingEdge(int index)
Returns the incoming edge by index.
Definition: layer.h:69
virtual float GetLoss()
Compute the value of the loss function that is displayed during training.
Definition: layer.cc:303
Matrix deriv_
State (activation) of the layer.
Definition: layer.h:127
virtual float GetLoss2()
Compute the value of the actual loss function.
Definition: layer.cc:389
virtual float GetLoss()
Compute the value of the loss function that is displayed during training.
Definition: layer.cc:378
void ApplyDerivativeofDropout()
Apply derivative of dropout.
Definition: layer.cc:208