SuperNN  0.7.0
training.cpp
1 /*
2  This file is part of SuperNN.
3 
4  SuperNN is free software: you can redistribute it and/or modify
5  it under the terms of the GNU Lesser General Public License as published by
6  the Free Software Foundation, either version 3 of the License, or
7  (at your option) any later version.
8 
9  SuperNN is distributed in the hope that it will be useful,
10  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  GNU Lesser General Public License for more details.
13 
14  You should have received a copy of the GNU Lesser General Public License
15  along with SuperNN. If not, see <http://www.gnu.org/licenses/>.
16 
17  Copyright (C) 2010 - 2014 Lucas Hermann Negri
18 */
19 
20 #include <vector>
21 #include <Eigen/Dense>
22 
23 #include "activation.hpp"
24 #include "neuron.hpp"
25 #include "utils.hpp"
26 #include "data.hpp"
27 #include "network.hpp"
28 #include "training.hpp"
29 #include "foreach.hpp"
30 
31 namespace SuperNN
32 {
33 
34 double TrainingAlgorithm::delta(Network &net, unsigned l, unsigned n)
35 {
36  std::vector<Layer> &layers = net.layers;
37  Neuron &neuron = layers[l][n];
38 
39  if(!neuron.delta_ok)
40  {
41  neuron.delta_ok = true;
42 
43  if(l == layers.size() - 1)
44  {
45  const double gd = ActFunc::derivative(neuron);
46  neuron.delta = neuron.err * gd;
47  }
48  else
49  {
50  foreach(c, 0, neuron.size())
51  {
52  const Connection &conn = neuron[c];
53  const double d = delta(net, conn.to_layer, conn.to_neuron);
54  const double gd = ActFunc::derivative(neuron);
55  neuron.delta += d * conn.weight * gd;
56  }
57  }
58  }
59 
60  return neuron.delta;
61 }
62 
64 {
65 
66 }
67 
69 {
70 }
71 
72 void TrainingAlgorithm::check(const Network& net, const Data& data) const
73 {
74  if(data.empty())
76 
77  const unsigned total_size = net.calc_num_inputs() + net.layers.back().size();
78 
79  if(total_size != data[0].size())
81 }
82 
84 {
85  std::vector<Layer> &layers = net.layers;
86 
87  foreach(l, 0, layers.size())
88  {
89  foreach(n, 0, layers[l].size())
90  {
91  Neuron &neuron = layers[l][n];
92  foreach(c, 0, neuron.size())
93  {
94  Connection &conn = neuron[c];
95  Neuron &to = layers[conn.to_layer][conn.to_neuron];
96  conn.derror += -to.delta * neuron.out;
97  }
98  }
99  }
100 }
101 
103 {
104  foreach(l, 0, net.layers.size())
105  {
106  foreach(n, 0, net.layers[l].size())
107  {
108  Neuron &neuron = net.layers[l][n];
109 
110  foreach(c, 0, neuron.size())
111  neuron[c].derror = 0;
112  }
113  }
114 }
115 
116 ImplBackprop::ImplBackprop() : eta_df(0.95), eta_if(1.03), eta_min(1e-5), eta_max(1e5), eta(0.7)
117 {
118 }
119 
121 {
122 }
123 
124 void ImplBackprop::update_weights(Network &net, double factor)
125 {
126  std::vector<Layer> &layers = net.layers;
127  const double eta_f = eta * factor;
128 
129  foreach(l, 0, layers.size())
130  {
131  foreach(n, 0, layers[l].size())
132  {
133  Neuron &neuron = layers[l][n];
134 
135  foreach(c, 0, neuron.size())
136  {
137  Connection &conn = neuron[c];
138  conn.weight += -eta_f * conn.derror;
139  }
140  }
141  }
142 }
143 
144 void ImplBackprop::update_eta(double mse, double last_mse)
145 {
146  if(mse >= last_mse)
147  {
148  eta *= eta_df;
149 
150  if(eta < eta_min)
151  eta = eta_min;
152  }
153  else
154  {
155  eta *= eta_if;
156 
157  if(eta > eta_max)
158  eta = eta_max;
159  }
160 }
161 
163 {
164 }
165 
167 {
168 }
169 
170 unsigned Incremental::train(Network &net, const Data &data, double dmse, unsigned max_epochs)
171 {
172  check(net, data);
173  std::vector<Layer> &layers = net.layers;
174 
175  double last_mse = -1;
176  unsigned e;
177 
178  for(e = 1; e <= max_epochs; ++e)
179  {
180  foreach(p, 0, data.size())
181  {
182  clear_derror_acc(net);
183  net.run(data[p], true);
184 
185  foreach(n, 0, layers[1].size())
186  delta(net, 1, n);
187 
188  derror_acc(net);
189  update_weights(net);
190  }
191 
192  const double mse = net.calc_mse(data);
193  update_eta(mse, last_mse);
194 
195  if(mse <= dmse) break;
196 
197  last_mse = mse;
198  }
199 
200  return e;
201 }
202 
204 {
205 }
206 
208 {
209 }
210 
211 unsigned Batch::train(Network &net, const Data &data, double dmse, unsigned max_epochs)
212 {
213  check(net, data);
214  std::vector<Layer> &layers = net.layers;
215 
216  double last_mse = -1, isize = data.size();
217  unsigned e;
218 
219  for(e = 1; e <= max_epochs; ++e)
220  {
221  clear_derror_acc(net);
222 
223  foreach(p, 0, data.size())
224  {
225  net.run(data[p], true);
226 
227  foreach(n, 0, layers[1].size())
228  delta(net, 1, n);
229 
230  derror_acc(net);
231  }
232 
233  update_weights(net, isize);
234 
235  const double mse = net.calc_mse(data);
236  update_eta(mse, last_mse);
237 
238  if(mse <= dmse) break;
239 
240  last_mse = mse;
241  }
242 
243  return e;
244 }
245 
246 /* IRprop algorithm */
247 
248 IRprop::IRprop() : delta_df(0.5), delta_if(1.2), delta_min(0), delta_max(50), delta_zero(0.1)
249 {
250 }
251 
253 {
254 }
255 
257 {
258  std::vector<Layer> &layers = net.layers;
259 
260  foreach(l, 0, layers.size())
261  {
262  foreach(n, 0, layers[l].size())
263  {
264  std::vector<Connection> &conns = layers[l][n].conns;
265  foreach(c, 0, conns.size())
266  {
267  conns[c].aux1 = 0; // last local error gradient
268  conns[c].aux2 = delta_zero; // last weight change
269  }
270  }
271  }
272 }
273 
275 {
276  std::vector<Layer> &layers = net.layers;
277 
278  foreach(l, 0, layers.size())
279  {
280  foreach(n, 0, layers[l].size())
281  {
282  Neuron &neuron = layers[l][n];
283  foreach(c, 0, neuron.size())
284  {
285  Connection &conn = neuron[c];
286  const double sign = conn.aux1 * conn.derror;
287 
288  if(sign > 0)
289  {
290  conn.aux2 *= delta_if;
291  }
292  else
293  {
294  if(sign < 0)
295  {
296  conn.aux2 *= delta_df;
297  conn.derror = 0;
298  }
299  }
300 
301  conn.aux2 = Utils::limit(delta_min, delta_max, conn.aux2);
302 
303  if(conn.derror > 0)
304  conn.weight -= conn.aux2;
305  else if(conn.derror < 0)
306  conn.weight += conn.aux2;
307 
308  conn.aux1 = conn.derror;
309  }
310  }
311  }
312 }
313 
314 unsigned IRprop::train(Network &net, const Data &data, double dmse, unsigned max_epochs)
315 {
316  check(net, data);
317  std::vector<Layer> &layers = net.layers;
318  unsigned e;
319 
320  for(e = 1; e <= max_epochs; ++e)
321  {
322  clear_derror_acc(net);
323 
324  foreach(p, 0, data.size())
325  {
326  net.run(data[p], true);
327 
328  foreach(n, 0, layers[1].size())
329  delta(net, 1, n);
330 
331  derror_acc(net);
332  }
333 
334  update_weights(net);
335 
336  if(dmse > 0 && net.calc_mse(data) <= dmse)
337  break;
338  }
339 
340  return e;
341 }
342 
343 /* IRprop L1 minimization algorithm */
345 {
346 }
347 
349 {
350 }
351 
352 double IRpropL1::delta(Network &net, unsigned l, unsigned n)
353 {
354  std::vector<Layer> &layers = net.layers;
355 
356  Neuron &neuron = layers[l][n];
357 
358  if(!neuron.delta_ok)
359  {
360  neuron.delta_ok = true;
361 
362  if(l == layers.size() - 1)
363  {
364  const double gd = ActFunc::derivative(neuron);
365  neuron.delta = neuron.err > 0 ? gd : (neuron.err < 0 ? -gd : 0);
366  }
367  else
368  {
369  foreach(c, 0, neuron.size())
370  {
371  Connection &conn = neuron[c];
372  const double d = delta(net, conn.to_layer, conn.to_neuron);
373  const double gd = ActFunc::derivative(neuron);
374  neuron.delta += d * conn.weight * gd;
375  }
376  }
377  }
378 
379  return neuron.delta;
380 }
381 
382 /* NBN algorithm */
383 
384 /* private members of NBN algorithm */
386 {
387  Params() : mu_zero(0.1), mu(mu_zero), beta(2.0), mu_min(1e-50), mu_max(1e50)
388  {
389  }
390 
392  Eigen::MatrixXd hessian;
393 
395  Eigen::VectorXd t_jacob_line;
396 
398  Eigen::VectorXd gradient;
399 
402  Eigen::VectorXd weights;
403 
405  Eigen::VectorXd weights_backup;
406 
408  double mu_zero;
409 
411  double mu;
412 
414  double beta;
415 
417  double mu_min;
418 
420  double mu_max;
421 };
422 
423 NBN::NBN() : p(new NBN::Params()), n_weights(0)
424 {
425 }
426 
428 {
429  delete p;
430 }
431 
433 {
434  n_weights = net.calc_num_weights();
435 
436  p->hessian = Eigen::MatrixXd(n_weights, n_weights);
437  p->gradient = Eigen::VectorXd(n_weights);
438  p->weights = Eigen::VectorXd(n_weights);
439  p->t_jacob_line = Eigen::VectorXd(n_weights);
440  p->mu = p->mu_zero;
441 }
442 
443 void NBN::get_weights(const Network &net)
444 {
445  unsigned i = 0;
446 
447  foreach(l, 0, net.layers.size())
448  {
449  const Layer &layer = net.layers[l];
450 
451  foreach(n, 0, layer.size())
452  {
453  foreach(c, 0, layer[n].size())
454  {
455  p->weights(i) = layer[n][c].weight;
456  ++i;
457  }
458  }
459  }
460 }
461 
462 void NBN::set_weights(Network &net) const
463 {
464  unsigned i = 0;
465 
466  foreach(l, 0, net.layers.size())
467  {
468  Layer &layer = net.layers[l];
469 
470  foreach(n, 0, layer.size())
471  {
472  foreach(c, 0, layer[n].size())
473  {
474  layer[n][c].weight = p->weights(i);
475  ++i;
476  }
477  }
478  }
479 }
480 
481 double NBN::delta(Network &net, unsigned l, unsigned n, unsigned m)
482 {
483  std::vector<Layer> &layers = net.layers;
484  Neuron &neuron = layers[l][n];
485 
486  if(!neuron.delta_ok)
487  {
488  neuron.delta_ok = true;
489 
490  if(l == layers.size() - 1)
491  {
492  if(n == m)
493  neuron.delta = ActFunc::derivative(neuron);
494  }
495  else
496  {
497  foreach(c, 0, neuron.size())
498  {
499  const Connection &conn = neuron[c];
500  const double d = delta(net, conn.to_layer, conn.to_neuron, m);
501  const double gd = ActFunc::derivative(neuron);
502  neuron.delta += d * conn.weight * gd;
503  }
504  }
505  }
506 
507  return neuron.delta;
508 }
509 
510 void NBN::calc_jacobian_transp_line(Network &net, unsigned m)
511 {
512  std::vector<Layer> &layers = net.layers;
513 
514  net.clear_neurons(true, false);
515  clear_derror_acc(net);
516 
517  foreach(n, 0, layers[1].size())
518  delta(net, 1, n, m);
519 
520  derror_acc(net);
521 
522  unsigned i = 0;
523 
524  foreach(l, 0, layers.size())
525  {
526  const Layer &layer = layers[l];
527 
528  foreach(n, 0, layer.size())
529  {
530  foreach(c, 0, layer[n].size())
531  {
532  p->t_jacob_line(i) = -layer[n][c].derror;
533  ++i;
534  }
535  }
536  }
537 }
538 
539 void NBN::update_hessian_gradient(double err)
540 {
541  p->hessian.noalias() += p->t_jacob_line * p->t_jacob_line.transpose();
542  p->gradient.noalias() += p->t_jacob_line * err;
543 }
544 
545 void NBN::update_weights(Network &net)
546 {
547  const unsigned n = n_weights;
548  p->weights.noalias() +=
549  (p->hessian + Eigen::MatrixXd::Identity(n, n) * p->mu).colPivHouseholderQr().solve(
550  p->gradient);
551  set_weights(net);
552 }
553 
554 bool NBN::update_mu(double mse, double last_mse)
555 {
556  bool res = false;
557 
558  if(mse >= last_mse)
559  {
560  p->weights = p->weights_backup;
561  p->mu *= p->beta;
562  res = false;
563  }
564  else
565  {
566  p->mu /= p->beta;
567  p->weights_backup = p->weights;
568  res = true;
569  }
570 
571  p->mu = Utils::limit(p->mu_min, p->mu_max, p->mu);
572  return res;
573 }
574 
575 unsigned NBN::train(Network &net, const Data &data, double dmse, unsigned max_epochs)
576 {
577  check(net, data);
578  Layer &last = net.layers.back();
579  double last_mse = net.calc_mse(data);
580  unsigned e;
581 
582  get_weights(net);
583  p->weights_backup = p->weights;
584 
585  for(e = 1; e <= max_epochs; ++e)
586  {
587  p->hessian.setZero();
588  p->gradient.setZero();
589 
590  foreach(p, 0, data.size())
591  {
592  net.run(data[p], true);
593 
594  foreach(m, 0, last.size())
595  {
596  calc_jacobian_transp_line(net, m);
597  update_hessian_gradient(last[m].err);
598  }
599  }
600 
601  double mse = -1;
602 
603  foreach(tt, 0, 5)
604  {
605  update_weights(net);
606  mse = net.calc_mse(data);
607 
608  if(update_mu(mse, last_mse))
609  break;
610  }
611 
612  if(mse <= dmse)
613  break;
614 
615  last_mse = mse;
616  }
617 
618  return e;
619 }
620 
621 double k_fold_error(TrainingAlgorithm& algo, const Network &net, const Data &data, unsigned k,
622  double dmse, unsigned max_epochs)
623 {
624  double acc = 0;
625 
626  foreach(n, 0, k)
627  {
628  Network cur_net = net;
629 
630  Data validation, training;
631  data.k_fold(n, k, validation, training);
632 
633  algo.prepare(cur_net);
634  algo.train(cur_net, training, dmse, max_epochs);
635 
636  acc += cur_net.calc_mse(validation);
637  }
638 
639  return acc / k;
640 }
641 
642 unsigned early_stopping(TrainingAlgorithm &algo, Network &net, const Data& training, const Data& validation, int step_size, int max_stuck, int max_epochs)
643 {
644  algo.prepare(net);
645  Network best_net = net;
646  double best_mse = net.calc_mse(validation);
647  unsigned best_it = 0;
648 
649  unsigned stuck = 0, it = 0;
650 
651  while( (stuck < max_stuck) && (it < max_epochs) )
652  {
653  algo.train(net, training, -1, step_size);
654  it += step_size;
655 
656  const double new_mse = net.calc_mse(validation);
657  if(new_mse < best_mse)
658  {
659  best_net = net;
660  best_mse = new_mse;
661  best_it = it;
662  stuck = 0;
663  }
664  else
665  {
666  stuck += step_size;
667  }
668  }
669 
670  net = best_net;
671  return best_it;
672 }
673 
674 }
void k_fold(unsigned n, unsigned k, Data &p, Data &l) const
Fills two Data objects with complementary information, useful for cross-validation.
Definition: data.cpp:345
Neuron, that can contain connections to neurons in the next layers.
Definition: neuron.hpp:70
double eta_df
Learning rate decrease factor (must be <= 1)
Definition: training.hpp:119
Eigen::MatrixXd hessian
Quasi-Hessian matrix.
Definition: training.cpp:392
virtual void check(const Network &net, const Data &data) const
Checks if the dimensions match and if the training algorithm can be used with a given network and dat...
Definition: training.cpp:72
void prepare(Network &net)
Prepares the trainer and a neural network for training.
Definition: training.cpp:432
double aux2
Auxiliary storage 2, used by some training algorithms.
Definition: neuron.hpp:58
virtual unsigned train(Network &net, const Data &data, double dmse=0, unsigned max_epochs=1000)
Adjusts the synaptic weight of an artificial neural network in order to minimize the error (MSE for s...
Definition: training.cpp:211
Abstract class that provides the calculation of the error derivatives and the error accumulation...
Definition: training.hpp:32
virtual void prepare(Network &net)
Prepares the trainer and a neural network for training.
Definition: training.cpp:68
unsigned early_stopping(TrainingAlgorithm &algo, Network &net, const Data &training, const SuperNN::Data &validation, int step_size=1, int max_stuck=20, int max_epochs=1000)
Trains an artificial neural network by using early stopping in order to avoid over-fitting.
Definition: training.cpp:642
double mu_zero
Initial value for mu.
Definition: training.cpp:408
unsigned calc_num_weights() const
Calculates the current number of weights.
Definition: network.cpp:487
virtual void derror_acc(Network &net)
Accumulates the error partial derivative in respect to the weights, for each connection of the neural...
Definition: training.cpp:83
double delta_zero
Initial weight change.
Definition: training.hpp:203
Synaptic connection between two neurons.
Definition: neuron.hpp:32
virtual void update_weights(Network &net)
Updates the weights, using the partial error derivative sign change as guide.
Definition: training.cpp:274
double calc_mse(const Data &data)
Calculates the mean squared error of the network related to a data.
Definition: network.cpp:234
virtual void update_eta(double mse, double last_mse)
Calculates the new learning rate (and updates it), based on the mean squared error last change...
Definition: training.cpp:144
bool delta_ok
Marks if the delta has been calculated for the current iteration.
Definition: neuron.hpp:154
virtual ~Batch()
Definition: training.cpp:207
Eigen::VectorXd weights_backup
Backup of the weights, for backtracking.
Definition: training.cpp:405
virtual void update_weights(Network &net, double factor=1)
Updates the weights using the accumulated error partial derivative calculated by derror_acc().
Definition: training.cpp:124
std::vector< Layer > layers
Neuron layers.
Definition: network.hpp:292
double aux1
Auxiliary storage 1, used by some training algorithms.
Definition: neuron.hpp:55
unsigned calc_num_inputs() const
Calculates the number of neurons on the first layer that aren't biases.
Definition: network.cpp:511
Improved resilient backpropagation algorithm.
Definition: training.hpp:182
double eta_max
Maximum learning rate.
Definition: training.hpp:128
double delta_if
Weight change increase factor.
Definition: training.hpp:194
virtual unsigned train(Network &net, const Data &data, double dmse=0, unsigned max_epochs=1000)
Adjusts the synaptic weight of an artificial neural network in order to minimize the error (MSE for s...
Definition: training.cpp:314
Eigen::VectorXd weights
Weight matrix (same as the network weights, but in a suitable format to the algorithm.
Definition: training.cpp:402
virtual unsigned train(Network &net, const Data &data, double dmse=0, unsigned max_epochs=1000)
Adjusts the synaptic weight of an artificial neural network in order to minimize the error (MSE for s...
Definition: training.cpp:170
double delta_df
Weight change decrease factor.
Definition: training.hpp:191
double weight
Weight.
Definition: neuron.hpp:49
virtual ~NBN()
Definition: training.cpp:427
virtual void prepare(Network &net)
Prepares the trainer and a neural network for training.
Definition: training.cpp:256
double out
Last output of the neuron ( g(net) )
Definition: neuron.hpp:139
double eta_min
Minimum learning rate.
Definition: training.hpp:125
virtual ~ImplBackprop()
Definition: training.cpp:120
thrown when calling a function with invalid parameters
Definition: utils.hpp:41
virtual ~IRpropL1()
Definition: training.cpp:348
Neuron by Neuron algorithm.
Definition: training.hpp:232
Eigen::VectorXd gradient
Error gradient matrix.
Definition: training.cpp:398
double delta
Last local error gradient.
Definition: neuron.hpp:145
static double derivative(const Neuron &neuron)
Calls the actual derivative of the activation function, used to calculate the error gradient...
Definition: activation.hpp:224
double eta_if
Learning rate increase factor (must be >= 1)
Definition: training.hpp:122
virtual double delta(Network &net, unsigned l, unsigned n)
Calculates the local error gradient for each neuron.
Definition: training.cpp:34
const Row & run(const Row &in, bool calc_error=false)
Propagates an input in the network.
Definition: network.cpp:177
virtual ~Incremental()
Definition: training.cpp:166
double delta(Network &net, unsigned l, unsigned n)
Calculates the local error gradient for each neuron.
Definition: training.cpp:352
double eta
Initial learning rate.
Definition: training.hpp:131
double mu_min
Minimum value of mu.
Definition: training.cpp:417
unsigned size() const
Returns the number of synaptic connections.
Definition: neuron.hpp:111
double beta
mu multiply/divide factor
Definition: training.cpp:414
Artificial neural network structure that supports arbitrary feedforward topologies, like multilayer perceptrons and fully connected cascade networks.
Definition: network.hpp:78
double err
Last error (desired - actual).
Definition: neuron.hpp:142
virtual ~IRprop()
Definition: training.cpp:252
unsigned train(Network &net, const Data &data, double dmse=0, unsigned max_epochs=100)
Adjusts the synaptic weight of an artificial neural network in order to minimize the error (MSE for s...
Definition: training.cpp:575
unsigned to_neuron
Position of the target neuron in it's layer.
Definition: neuron.hpp:64
The exception can be identified by the type() method.
Definition: utils.hpp:59
virtual void clear_derror_acc(Network &net)
Clears the accumulated error partial derivatives.
Definition: training.cpp:102
unsigned to_layer
Layer where the target neuron is located.
Definition: neuron.hpp:61
Eigen::VectorXd t_jacob_line
Transposed Jacobian line.
Definition: training.cpp:395
double mu_max
Maximum value of mu.
Definition: training.cpp:420
Array of neurons.
Definition: network.hpp:36
Data used in training, validation and testing.
Definition: data.hpp:95
double delta_max
Maximum weight change.
Definition: training.hpp:200
double derror
Accumulated partial error derivative in respect to the connection weight.
Definition: neuron.hpp:52
thrown when the dimensions of a Row and the network does not match
Definition: utils.hpp:50
double k_fold_error(TrainingAlgorithm &algo, const SuperNN::Network &net, const Data &data, unsigned k=10, double dmse=0, unsigned max_epochs=1000)
Estimates the performance of a neural network for an independent data set by using k-fold cross valid...
Definition: training.cpp:621
double delta_min
Minimum weight change.
Definition: training.hpp:197
double mu
Current value of mu.
Definition: training.cpp:411
double limit(double min, double max, double value)
Returns the value limited to a range.
Definition: utils.hpp:107
virtual unsigned train(Network &net, const Data &data, double dmse=0, unsigned max_epochs=1000)=0
Adjusts the synaptic weight of an artificial neural network in order to minimize the error (MSE for s...