SuperNN  1.0.0
training.cpp
1 /*
2  This file is part of SuperNN.
3 
4  SuperNN is free software: you can redistribute it and/or modify
5  it under the terms of the GNU Lesser General Public License as published by
6  the Free Software Foundation, either version 3 of the License, or
7  (at your option) any later version.
8 
9  SuperNN is distributed in the hope that it will be useful,
10  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  GNU Lesser General Public License for more details.
13 
14  You should have received a copy of the GNU Lesser General Public License
15  along with SuperNN. If not, see <http://www.gnu.org/licenses/>.
16 
17  Copyright (C) 2010 - 2015 Lucas Hermann Negri
18 */
19 
20 #include <vector>
21 #include <Eigen/Dense>
22 
23 #include "activation.hpp"
24 #include "neuron.hpp"
25 #include "utils.hpp"
26 #include "data.hpp"
27 #include "network.hpp"
28 #include "training.hpp"
29 
30 namespace SuperNN
31 {
32 
33 double TrainingAlgorithm::delta(Network &net, unsigned l, unsigned n)
34 {
35  std::vector<Layer> &layers = net.layers;
36  Neuron &neuron = layers[l][n];
37 
38  if(!neuron.delta_ok)
39  {
40  neuron.delta_ok = true;
41 
42  if(l == layers.size() - 1)
43  {
44  const double gd = ActFunc::derivative(neuron);
45  neuron.delta = neuron.err * gd;
46  }
47  else
48  {
49  for(unsigned c = 0, e = neuron.size(); c < e; ++c)
50  {
51  const Connection &conn = neuron[c];
52  const double d = delta(net, conn.to_layer, conn.to_neuron);
53  const double gd = ActFunc::derivative(neuron);
54  neuron.delta += d * conn.weight * gd;
55  }
56  }
57  }
58 
59  return neuron.delta;
60 }
61 
63 {
64 
65 }
66 
68 {
69 }
70 
71 void TrainingAlgorithm::check(const Network& net, const Data& data) const
72 {
73  if(data.empty())
75 
76  const unsigned total_size = net.calc_num_inputs() + net.layers.back().size();
77 
78  if(total_size != data[0].size())
80 }
81 
83 {
84  std::vector<Layer> &layers = net.layers;
85 
86  for(unsigned l = 0, e = layers.size(); l < e; ++l)
87  {
88  for(unsigned n = 0, e = layers[l].size(); n < e; ++n)
89  {
90  Neuron &neuron = layers[l][n];
91  for(unsigned c = 0, e = neuron.size(); c < e; ++c)
92  {
93  Connection &conn = neuron[c];
94  Neuron &to = layers[conn.to_layer][conn.to_neuron];
95  conn.derror += -to.delta * neuron.out;
96  }
97  }
98  }
99 }
100 
102 {
103  for(unsigned l = 0, e = net.layers.size(); l < e; ++l)
104  {
105  for(unsigned n = 0, e = net.layers[l].size(); n < e; ++n)
106  {
107  Neuron &neuron = net.layers[l][n];
108 
109  for(unsigned c = 0, e = neuron.size(); c < e; ++c)
110  neuron[c].derror = 0;
111  }
112  }
113 }
114 
115 ImplBackprop::ImplBackprop() : eta_df(0.95), eta_if(1.03), eta_min(1e-5), eta_max(1e5), eta(0.7)
116 {
117 }
118 
120 {
121 }
122 
123 void ImplBackprop::update_weights(Network &net, double factor)
124 {
125  std::vector<Layer> &layers = net.layers;
126  const double eta_f = eta * factor;
127 
128  for(unsigned l = 0, e = layers.size(); l < e; ++l)
129  {
130  for(unsigned n = 0, e = layers[l].size(); n < e; ++n)
131  {
132  Neuron &neuron = layers[l][n];
133 
134  for(unsigned c = 0, e = neuron.size(); c < e; ++c)
135  {
136  Connection &conn = neuron[c];
137  conn.weight += -eta_f * conn.derror;
138  }
139  }
140  }
141 }
142 
143 void ImplBackprop::update_eta(double mse, double last_mse)
144 {
145  if(mse >= last_mse)
146  {
147  eta *= eta_df;
148 
149  if(eta < eta_min)
150  eta = eta_min;
151  }
152  else
153  {
154  eta *= eta_if;
155 
156  if(eta > eta_max)
157  eta = eta_max;
158  }
159 }
160 
162 {
163 }
164 
166 {
167 }
168 
169 unsigned Incremental::train(Network &net, const Data &data, double dmse, unsigned max_epochs)
170 {
171  check(net, data);
172  std::vector<Layer> &layers = net.layers;
173 
174  double last_mse = -1;
175  unsigned e;
176 
177  for(e = 1; e <= max_epochs; ++e)
178  {
179  for(unsigned p = 0, e = data.size(); p < e; ++p)
180  {
181  clear_derror_acc(net);
182  net.run(data[p], true);
183 
184  for(unsigned n = 0, e = layers[1].size(); n < e; ++n)
185  delta(net, 1, n);
186 
187  derror_acc(net);
188  update_weights(net);
189  }
190 
191  const double mse = net.calc_mse(data);
192  update_eta(mse, last_mse);
193 
194  if(mse <= dmse) break;
195 
196  last_mse = mse;
197  }
198 
199  return e;
200 }
201 
203 {
204 }
205 
207 {
208 }
209 
210 unsigned Batch::train(Network &net, const Data &data, double dmse, unsigned max_epochs)
211 {
212  check(net, data);
213  std::vector<Layer> &layers = net.layers;
214 
215  double last_mse = -1, isize = data.size();
216  unsigned e;
217 
218  for(e = 1; e <= max_epochs; ++e)
219  {
220  clear_derror_acc(net);
221 
222  for(unsigned p = 0, e = data.size(); p < e; ++p)
223  {
224  net.run(data[p], true);
225 
226  for(unsigned n = 0, e = layers[1].size(); n < e; ++n)
227  delta(net, 1, n);
228 
229  derror_acc(net);
230  }
231 
232  update_weights(net, isize);
233 
234  const double mse = net.calc_mse(data);
235  update_eta(mse, last_mse);
236 
237  if(mse <= dmse) break;
238 
239  last_mse = mse;
240  }
241 
242  return e;
243 }
244 
245 /* IRprop algorithm */
246 
247 IRprop::IRprop() : delta_df(0.5), delta_if(1.2), delta_min(0), delta_max(50), delta_zero(0.1)
248 {
249 }
250 
252 {
253 }
254 
256 {
257  std::vector<Layer> &layers = net.layers;
258 
259  for(unsigned l = 0, e = layers.size(); l < e; ++l)
260  {
261  for(unsigned n = 0, e = layers[l].size(); n < e; ++n)
262  {
263  std::vector<Connection> &conns = layers[l][n].conns;
264 
265  for(unsigned c = 0, e = conns.size(); c < e; ++c)
266  {
267  conns[c].aux1 = 0; // last local error gradient
268  conns[c].aux2 = delta_zero; // last weight change
269  }
270  }
271  }
272 }
273 
275 {
276  std::vector<Layer> &layers = net.layers;
277 
278  for(unsigned l = 0, e = layers.size(); l < e; ++l)
279  {
280  for(unsigned n = 0, e = layers[l].size(); n < e; ++n)
281  {
282  Neuron &neuron = layers[l][n];
283  for(unsigned c = 0, e = neuron.size(); c < e; ++c)
284  {
285  Connection &conn = neuron[c];
286  const double sign = conn.aux1 * conn.derror;
287 
288  if(sign > 0)
289  {
290  conn.aux2 *= delta_if;
291  }
292  else
293  {
294  if(sign < 0)
295  {
296  conn.aux2 *= delta_df;
297  conn.derror = 0;
298  }
299  }
300 
301  conn.aux2 = Utils::limit(delta_min, delta_max, conn.aux2);
302 
303  if(conn.derror > 0)
304  conn.weight -= conn.aux2;
305  else if(conn.derror < 0)
306  conn.weight += conn.aux2;
307 
308  conn.aux1 = conn.derror;
309  }
310  }
311  }
312 }
313 
314 unsigned IRprop::train(Network &net, const Data &data, double dmse, unsigned max_epochs)
315 {
316  check(net, data);
317  std::vector<Layer> &layers = net.layers;
318  unsigned e;
319 
320  for(e = 1; e <= max_epochs; ++e)
321  {
322  clear_derror_acc(net);
323 
324  for(unsigned p = 0, e = data.size(); p < e; ++p)
325  {
326  net.run(data[p], true);
327 
328  for(unsigned n = 0, e = layers[1].size(); n < e; ++n)
329  delta(net, 1, n);
330 
331  derror_acc(net);
332  }
333 
334  update_weights(net);
335 
336  if(dmse > 0 && net.calc_mse(data) <= dmse)
337  break;
338  }
339 
340  return e;
341 }
342 
343 /* IRprop L1 minimization algorithm */
345 {
346 }
347 
349 {
350 }
351 
352 double IRpropL1::delta(Network &net, unsigned l, unsigned n)
353 {
354  std::vector<Layer> &layers = net.layers;
355 
356  Neuron &neuron = layers[l][n];
357 
358  if(!neuron.delta_ok)
359  {
360  neuron.delta_ok = true;
361 
362  if(l == layers.size() - 1)
363  {
364  const double gd = ActFunc::derivative(neuron);
365  neuron.delta = neuron.err > 0 ? gd : (neuron.err < 0 ? -gd : 0);
366  }
367  else
368  {
369  for(unsigned c = 0, e = neuron.size(); c < e; ++c)
370  {
371  Connection &conn = neuron[c];
372  const double d = delta(net, conn.to_layer, conn.to_neuron);
373  const double gd = ActFunc::derivative(neuron);
374  neuron.delta += d * conn.weight * gd;
375  }
376  }
377  }
378 
379  return neuron.delta;
380 }
381 
382 /* NBN algorithm */
383 
384 /* private members of NBN algorithm */
385 struct NBN::Hist
386 {
388  Eigen::MatrixXd hessian;
389 
391  Eigen::VectorXd t_jacob_line;
392 
394  Eigen::VectorXd gradient;
395 
398  Eigen::VectorXd weights;
399 
401  Eigen::VectorXd weights_backup;
402 };
403 
404 NBN::NBN() : h(new NBN::Hist()), n_weights(0), mu_zero(0.1), mu(mu_zero), beta(2.0), mu_min(1e-50), mu_max(1e50), local_iters(5)
405 
406 {
407 }
408 
410 {
411  delete h;
412 }
413 
415 {
416  n_weights = net.calc_num_weights();
417 
418  h->hessian = Eigen::MatrixXd(n_weights, n_weights);
419  h->gradient = Eigen::VectorXd(n_weights);
420  h->weights = Eigen::VectorXd(n_weights);
421  h->t_jacob_line = Eigen::VectorXd(n_weights);
422  mu = mu_zero;
423 }
424 
425 void NBN::get_weights(const Network &net)
426 {
427  unsigned i = 0;
428 
429  for(unsigned l = 0, e = net.layers.size(); l < e; ++l)
430  {
431  const Layer &layer = net.layers[l];
432 
433  for(unsigned n = 0, e = layer.size(); n < e; ++n)
434  {
435  for(unsigned c = 0, e = layer[n].size(); c < e; ++c)
436  {
437  h->weights(i) = layer[n][c].weight;
438  ++i;
439  }
440  }
441  }
442 }
443 
444 void NBN::set_weights(Network &net) const
445 {
446  unsigned i = 0;
447 
448  for(unsigned l = 0, e = net.layers.size(); l < e; ++l)
449  {
450  Layer &layer = net.layers[l];
451 
452  for(unsigned n = 0, e = layer.size(); n < e; ++n)
453  {
454  for(unsigned c = 0, e = layer[n].size(); c < e; ++c)
455  {
456  layer[n][c].weight = h->weights(i);
457  ++i;
458  }
459  }
460  }
461 }
462 
463 double NBN::delta(Network &net, unsigned l, unsigned n, unsigned m)
464 {
465  std::vector<Layer> &layers = net.layers;
466  Neuron &neuron = layers[l][n];
467 
468  if(!neuron.delta_ok)
469  {
470  neuron.delta_ok = true;
471 
472  if(l == layers.size() - 1)
473  {
474  if(n == m)
475  neuron.delta = ActFunc::derivative(neuron);
476  }
477  else
478  {
479  for(unsigned c = 0, e = neuron.size(); c < e; ++c)
480  {
481  const Connection &conn = neuron[c];
482  const double d = delta(net, conn.to_layer, conn.to_neuron, m);
483  const double gd = ActFunc::derivative(neuron);
484  neuron.delta += d * conn.weight * gd;
485  }
486  }
487  }
488 
489  return neuron.delta;
490 }
491 
492 void NBN::calc_jacobian_transp_line(Network &net, unsigned m)
493 {
494  std::vector<Layer> &layers = net.layers;
495 
496  net.clear_neurons(true, false);
497  clear_derror_acc(net);
498 
499  for(unsigned n = 0, e = layers[1].size(); n < e; ++n)
500  delta(net, 1, n, m);
501 
502  derror_acc(net);
503 
504  unsigned i = 0;
505 
506  for(unsigned l = 0, e = layers.size(); l < e; ++l)
507  {
508  const Layer &layer = layers[l];
509 
510  for(unsigned n = 0, e = layer.size(); n < e; ++n)
511  {
512  for(unsigned c = 0, e = layer[n].size(); c < e; ++c)
513  {
514  h->t_jacob_line(i) = -layer[n][c].derror;
515  ++i;
516  }
517  }
518  }
519 }
520 
521 void NBN::update_hessian_gradient(double err)
522 {
523  h->hessian.noalias() += h->t_jacob_line * h->t_jacob_line.transpose();
524  h->gradient.noalias() += h->t_jacob_line * err;
525 }
526 
527 void NBN::update_weights(Network &net)
528 {
529  const unsigned n = n_weights;
530  h->weights.noalias() +=
531  (h->hessian + Eigen::MatrixXd::Identity(n, n) * mu).colPivHouseholderQr().solve(
532  h->gradient);
533  set_weights(net);
534 }
535 
536 bool NBN::update_mu(double mse, double last_mse)
537 {
538  bool res = false;
539 
540  if(mse >= last_mse)
541  {
542  h->weights = h->weights_backup;
543  mu *= beta;
544  res = false;
545  }
546  else
547  {
548  mu /= beta;
549  h->weights_backup = h->weights;
550  res = true;
551  }
552 
553  mu = Utils::limit(mu_min, mu_max, mu);
554  return res;
555 }
556 
557 unsigned NBN::train(Network &net, const Data &data, double dmse, unsigned max_epochs)
558 {
559  check(net, data);
560  Layer &last = net.layers.back();
561  double last_mse = net.calc_mse(data);
562  unsigned e;
563 
564  get_weights(net);
565  h->weights_backup = h->weights;
566 
567  for(e = 1; e <= max_epochs; ++e)
568  {
569  h->hessian.setZero();
570  h->gradient.setZero();
571 
572  for(unsigned p = 0, e = data.size(); p < e; ++p)
573  {
574  net.run(data[p], true);
575 
576  for(unsigned m = 0, e = last.size(); m < e; ++m)
577  {
578  calc_jacobian_transp_line(net, m);
579  update_hessian_gradient(last[m].err);
580  }
581  }
582 
583  double mse = -1;
584 
585  /* local iterations */
586  for(unsigned tt = 0; tt < local_iters; ++tt)
587  {
588  update_weights(net);
589  mse = net.calc_mse(data);
590 
591  if(update_mu(mse, last_mse))
592  break;
593  }
594 
595  if(mse <= dmse)
596  break;
597 
598  last_mse = mse;
599  }
600 
601  return e;
602 }
603 
604 double k_fold_error(TrainingAlgorithm& algo, const Network &net, const Data &data, unsigned k,
605  double dmse, unsigned max_epochs)
606 {
607  double acc = 0;
608 
609  for(unsigned n = 0; n < k; ++n)
610  {
611  Network cur_net = net;
612 
613  Data validation, training;
614  data.k_fold(n, k, validation, training);
615 
616  algo.prepare(cur_net);
617  algo.train(cur_net, training, dmse, max_epochs);
618 
619  acc += cur_net.calc_mse(validation);
620  }
621 
622  return acc / k;
623 }
624 
625 unsigned early_stopping(TrainingAlgorithm &algo, Network &net, const Data& training, const Data& validation, unsigned step_size, unsigned max_stuck, unsigned max_epochs)
626 {
627  algo.prepare(net);
628  Network best_net = net;
629  double best_mse = net.calc_mse(validation);
630  unsigned best_it = 0;
631 
632  unsigned stuck = 0, it = 0;
633 
634  while( (stuck < max_stuck) && (it < max_epochs) )
635  {
636  algo.train(net, training, -1, step_size);
637  it += step_size;
638 
639  const double new_mse = net.calc_mse(validation);
640  if(new_mse < best_mse)
641  {
642  best_net = net;
643  best_mse = new_mse;
644  best_it = it;
645  stuck = 0;
646  }
647  else
648  {
649  stuck += step_size;
650  }
651  }
652 
653  net = best_net;
654  return best_it;
655 }
656 
657 }
void k_fold(unsigned n, unsigned k, Data &p, Data &l) const
Fills two Data objects with complementary information, useful for cross-validation.
Definition: data.cpp:348
Neuron, that can contain connections to neurons in the next layers.
Definition: neuron.hpp:70
double eta_df
Learning rate decrease factor (must be <= 1)
Definition: training.hpp:121
virtual void check(const Network &net, const Data &data) const
Checks if the dimensions match and if the training algorithm can be used with a given network and dat...
Definition: training.cpp:71
void prepare(Network &net)
Prepares the trainer and a neural network for training.
Definition: training.cpp:414
double aux2
Auxiliary storage 2, used by some training algorithms.
Definition: neuron.hpp:58
virtual unsigned train(Network &net, const Data &data, double dmse=0, unsigned max_epochs=1000)
Adjusts the synaptic weight of an artificial neural network in order to minimize the error (MSE for s...
Definition: training.cpp:210
Abstract class that provides the calculation of the error derivatives and the error accumulation...
Definition: training.hpp:34
virtual void prepare(Network &net)
Prepares the trainer and a neural network for training.
Definition: training.cpp:67
unsigned calc_num_weights() const
Calculates the current number of weights.
Definition: network.cpp:487
virtual void derror_acc(Network &net)
Accumulates the error partial derivative in respect to the weights, for each connection of the neural...
Definition: training.cpp:82
SUPERNN_EXPORT unsigned early_stopping(TrainingAlgorithm &algo, Network &net, const Data &training, const SuperNN::Data &validation, unsigned step_size=1, unsigned max_stuck=20, unsigned max_epochs=1000)
Trains an artificial neural network by using early stopping in order to avoid over-fitting.
Definition: training.cpp:625
double delta_zero
Initial weight change.
Definition: training.hpp:205
Synaptic connection between two neurons.
Definition: neuron.hpp:32
Eigen::VectorXd gradient
Error gradient matrix.
Definition: training.cpp:394
virtual void update_weights(Network &net)
Updates the weights, using the partial error derivative sign change as guide.
Definition: training.cpp:274
double calc_mse(const Data &data)
Calculates the mean squared error of the network related to a data.
Definition: network.cpp:233
virtual void update_eta(double mse, double last_mse)
Calculates the new learning rate (and updates it), based on the mean squared error last change...
Definition: training.cpp:143
bool delta_ok
Marks if the delta has been calculated for the current iteration.
Definition: neuron.hpp:154
virtual ~Batch()
Definition: training.cpp:206
virtual void update_weights(Network &net, double factor=1)
Updates the weights using the accumulated error partial derivative calculated by derror_acc().
Definition: training.cpp:123
std::vector< Layer > layers
Neuron layers.
Definition: network.hpp:292
double aux1
Auxiliary storage 1, used by some training algorithms.
Definition: neuron.hpp:55
unsigned calc_num_inputs() const
Calculates the number of neurons on the first layer that aren't biases.
Definition: network.cpp:512
Improved resilient backpropagation algorithm.
Definition: training.hpp:184
double eta_max
Maximum learning rate.
Definition: training.hpp:130
double delta_if
Weight change increase factor.
Definition: training.hpp:196
virtual unsigned train(Network &net, const Data &data, double dmse=0, unsigned max_epochs=1000)
Adjusts the synaptic weight of an artificial neural network in order to minimize the error (MSE for s...
Definition: training.cpp:314
Eigen::VectorXd t_jacob_line
Transposed Jacobian line.
Definition: training.cpp:391
virtual unsigned train(Network &net, const Data &data, double dmse=0, unsigned max_epochs=1000)
Adjusts the synaptic weight of an artificial neural network in order to minimize the error (MSE for s...
Definition: training.cpp:169
double delta_df
Weight change decrease factor.
Definition: training.hpp:193
double weight
Weight.
Definition: neuron.hpp:49
virtual ~NBN()
Definition: training.cpp:409
virtual void prepare(Network &net)
Prepares the trainer and a neural network for training.
Definition: training.cpp:255
double out
Last output of the neuron ( g(net) )
Definition: neuron.hpp:139
double eta_min
Minimum learning rate.
Definition: training.hpp:127
Eigen::MatrixXd hessian
Quasi-Hessian matrix.
Definition: training.cpp:388
virtual ~ImplBackprop()
Definition: training.cpp:119
thrown when calling a function with invalid parameters
Definition: utils.hpp:51
virtual ~IRpropL1()
Definition: training.cpp:348
Neuron by Neuron algorithm.
Definition: training.hpp:234
double delta
Last local error gradient.
Definition: neuron.hpp:145
static double derivative(const Neuron &neuron)
Calls the actual derivative of the activation function, used to calculate the error gradient...
Definition: activation.hpp:227
double eta_if
Learning rate increase factor (must be >= 1)
Definition: training.hpp:124
virtual double delta(Network &net, unsigned l, unsigned n)
Calculates the local error gradient for each neuron.
Definition: training.cpp:33
const Row & run(const Row &in, bool calc_error=false)
Propagates an input in the network.
Definition: network.cpp:176
virtual ~Incremental()
Definition: training.cpp:165
double delta(Network &net, unsigned l, unsigned n)
Calculates the local error gradient for each neuron.
Definition: training.cpp:352
double eta
Initial learning rate.
Definition: training.hpp:133
unsigned size() const
Returns the number of synaptic connections.
Definition: neuron.hpp:111
Artificial neural network structure that supports arbitrary feedforward topologies, like multilayer perceptrons and fully connected cascade networks.
Definition: network.hpp:78
double err
Last error (desired - actual).
Definition: neuron.hpp:142
virtual ~IRprop()
Definition: training.cpp:251
unsigned train(Network &net, const Data &data, double dmse=0, unsigned max_epochs=100)
Adjusts the synaptic weight of an artificial neural network in order to minimize the error (MSE for s...
Definition: training.cpp:557
unsigned to_neuron
Position of the target neuron in it's layer.
Definition: neuron.hpp:64
The exception can be identified by the type() method.
Definition: utils.hpp:69
virtual void clear_derror_acc(Network &net)
Clears the accumulated error partial derivatives.
Definition: training.cpp:101
unsigned to_layer
Layer where the target neuron is located.
Definition: neuron.hpp:61
Eigen::VectorXd weights
Weight matrix (same as the network weights, but in a suitable format to the algorithm.
Definition: training.cpp:398
Eigen::VectorXd weights_backup
Backup of the weights, for backtracking.
Definition: training.cpp:401
Array of neurons.
Definition: network.hpp:36
Data used in training, validation and testing.
Definition: data.hpp:95
double delta_max
Maximum weight change.
Definition: training.hpp:202
double derror
Accumulated partial error derivative in respect to the connection weight.
Definition: neuron.hpp:52
thrown when the dimensions of a Row and the network does not match
Definition: utils.hpp:60
SUPERNN_EXPORT double k_fold_error(TrainingAlgorithm &algo, const SuperNN::Network &net, const Data &data, unsigned k=10, double dmse=0, unsigned max_epochs=1000)
Estimates the performance of a neural network for an independent data set by using k-fold cross valid...
Definition: training.cpp:604
double delta_min
Minimum weight change.
Definition: training.hpp:199
double limit(double min, double max, double value)
Returns the value limited to a range.
Definition: utils.hpp:117
virtual unsigned train(Network &net, const Data &data, double dmse=0, unsigned max_epochs=1000)=0
Adjusts the synaptic weight of an artificial neural network in order to minimize the error (MSE for s...