SuperNN  0.7.0
data.cpp
1 /*
2  This file is part of SuperNN.
3 
4  SuperNN is free software: you can redistribute it and/or modify
5  it under the terms of the GNU Lesser General Public License as published by
6  the Free Software Foundation, either version 3 of the License, or
7  (at your option) any later version.
8 
9  SuperNN is distributed in the hope that it will be useful,
10  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  GNU Lesser General Public License for more details.
13 
14  You should have received a copy of the GNU Lesser General Public License
15  along with SuperNN. If not, see <http://www.gnu.org/licenses/>.
16 
17  Copyright (C) 2010 - 2014 Lucas Hermann Negri
18 */
19 
20 #include <locale>
21 #include <algorithm>
22 #include "data.hpp"
23 #include "foreach.hpp"
24 
25 namespace SuperNN
26 {
28 {
29 }
30 
32 {
33 }
34 
35 SInfo::SInfo(double _min, double _max) : min(_min), max(_max)
36 {
37 }
38 
39 double SInfo::scale(const SInfo &to, double value) const
40 {
41  const double brange = max - min;
42  const double srange = to.max - to.min;
43 
44  if(brange < FSMALL || srange < FSMALL)
45  {
46  return (to.max + to.min) * 2; // mean
47  }
48  else
49  {
50  const double x = (value - min) / brange;
51  return x * srange + to.min;
52  }
53 }
54 
56 {
57 }
58 
60 {
61 }
62 
63 void Bounds::merge_with(const Bounds &other)
64 {
65  foreach(n, 0, size())
66  {
67  SInfo &i1 = at(n);
68  const SInfo &i2 = other.at(n);
69 
70  i1.min = std::min(i1.min, i2.min);
71  i1.max = std::max(i1.max, i2.max);
72  }
73 }
74 
75 void Bounds::save_file(std::ofstream &out) const
76 {
77  out << size() << std::endl;
78  out.precision(file_precision);
79 
80  foreach(b, 0, size())
81  out << at(b).min << "\t" << at(b).max << std::endl;
82 }
83 
84 void Bounds::load_file(std::ifstream &inp)
85 {
86  unsigned n;
87  inp >> n;
88 
89  resize(n);
90 
91  foreach(i, 0, n)
92  inp >> at(i).min >> at(i).max;
93 }
94 
95 void Bounds::load_file(const std::string &path, Bounds &from, Bounds &to)
96 {
97  std::ifstream inp;
98  inp.open(path.c_str());
99 
100  if(!inp.is_open())
102 
103  inp.imbue(std::locale("C"));
104 
105  from.load_file(inp);
106  to.load_file(inp);
107 
108  inp.close();
109 }
110 
112 {
113 }
114 
115 Data::Data(unsigned rows, unsigned cols) : std::vector<Row>(rows, Row(cols)), n_total(cols)
116 {
117  from.resize(cols);
118  to.resize(cols);
119 }
120 
122 {
123 }
124 
125 Data::Data(unsigned rows, unsigned cols, const double st[])
126  : std::vector<Row>(rows, Row(cols)), n_total(cols)
127 {
128  from.resize(cols);
129  to.resize(cols);
130 
131  foreach(r, 0, rows)
132  {
133  Row &a = at(r);
134  foreach(c, 0, cols)
135  a[c] = st[r * cols + c];
136  }
137 }
138 
139 Data::Data(unsigned rows, unsigned cols, const Row &row)
140  : std::vector<Row>(rows, Row(cols)), n_total(cols)
141 {
142  from.resize(cols);
143  to.resize(cols);
144 
145  foreach(r, 0, rows)
146  {
147  Row &a = at(r);
148  foreach(c, 0, cols)
149  a[c] = row[r * cols + c];
150  }
151 }
152 
153 Data Data::drop_column(unsigned col) const
154 {
155  if(col < 0 || col >= n_total)
157 
158  Data new_data;
159  new_data.reserve(size());
160  new_data.n_total = n_total - 1;
161 
162  /* bounds */
163  foreach(i, 0, n_total)
164  {
165  if(i != col)
166  {
167  new_data.from.push_back(from[i]);
168  new_data.to.push_back(to[i]);
169  }
170  }
171 
172  /* content */
173  foreach(i, 0, size())
174  {
175  Row r;
176  foreach(j, 0, n_total)
177  {
178  if(j != col)
179  r.push_back(at(i)[j]);
180  }
181  new_data.push_back(r);
182  }
183 
184  return new_data;
185 }
186 
188 {
189  random_shuffle(begin(), end());
190 }
191 
192 Data Data::sample(unsigned first, unsigned last) const
193 {
194  const int n_rows = last - first;
195 
196  if(n_rows < 0)
198 
199  Data new_data(n_rows, n_total);
200 
201  for(int i = 0; i < n_rows; ++i)
202  new_data[i] = at(first + i);
203 
204  new_data.from = from;
205  new_data.to = to;
206 
207  return new_data;
208 }
209 
211 {
212  push_back(Row(n_total));
213  return back();
214 }
215 
216 unsigned Data::load_file(const std::string &path)
217 {
218  std::ifstream inp;
219  inp.open(path.c_str());
220 
221  if(!inp.is_open())
223 
224  inp.imbue(std::locale("C"));
225 
226  unsigned n_rows;
227  inp >> n_rows >> n_total;
228 
229  from.resize(n_total);
230  to.resize(n_total);
231 
232  if(n_rows < 1 || n_total < 2)
233  {
234  inp.close();
236  }
237 
238  reserve(size() + n_rows);
239 
240  foreach(r, 0, n_rows)
241  {
242  Row row(n_total);
243 
244  foreach(i, 0, n_total)
245  inp >> row[i];
246 
247  push_back(row);
248  }
249 
250  inp.close();
251  return n_rows;
252 }
253 
254 void Data::save_file(const std::string &path) const
255 {
256  std::ofstream out;
257  out.open(path.c_str());
258 
259  if(!out.is_open())
261 
262  out.imbue(std::locale("C"));
263 
264  out << size() << " " << n_total << std::endl;
265  out.precision(file_precision);
266 
267  foreach(r, 0, size())
268  {
269  foreach(i, 0, n_total)
270  out << at(r)[i] << " ";
271  out << std::endl;
272  }
273 
274  out.close();
275 }
276 
277 void Data::save_info_file(const std::string &path) const
278 {
279  std::ofstream out;
280  out.open(path.c_str());
281 
282  if(!out.is_open())
284 
285  out.imbue(std::locale("C"));
286  out.precision(file_precision);
287 
288  from.save_file(out);
289  to.save_file(out);
290 
291  out.close();
292 }
293 
295 {
296  from.resize(n_total);
297 
298  foreach(n, 0, n_total)
299  {
300  double min = at(0)[n], max = at(0)[n];
301 
302  foreach(p, 1, size())
303  {
304  double v = at(p)[n];
305 
306  if(v < min)
307  min = v;
308  else if(v > max)
309  max = v;
310  }
311 
312  from[n] = SInfo(min, max);
313  }
314 
315  return from;
316 }
317 
318 void Data::scale_column(unsigned n, const SInfo &curv, const SInfo &newv)
319 {
320  foreach(p, 0, size())
321  at(p)[n] = curv.scale(newv, at(p)[n]);
322 }
323 
325 {
326  foreach(c, 0, n_total)
327  scale_column(c, from[c], to[c]);
328 }
329 
330 void Data::scale(double min, double max)
331 {
332  foreach(c, 0, n_total)
333  {
334  to[c] = SInfo(min, max);
335  scale_column(c, from[c], to[c]);
336  }
337 }
338 
340 {
341  foreach(c, 0, n_total)
342  scale_column(c, to[c], from[c]);
343 }
344 
345 void Data::k_fold(unsigned n, unsigned k, Data &p, Data &l) const
346 {
347  foreach(r, 0, size())
348  {
349  if(r % k == n)
350  p.push_back(at(r));
351  else
352  l.push_back(at(r));
353  }
354 }
355 
356 }
void k_fold(unsigned n, unsigned k, Data &p, Data &l) const
Fills two Data objects with complementary information, useful for cross-validation.
Definition: data.cpp:345
void scale_column(unsigned n, const SInfo &curv, const SInfo &newv)
Scales a single data column.
Definition: data.cpp:318
Bounds from
Original data bounds.
Definition: data.hpp:261
thrown when a file couldn't be opened
Definition: utils.hpp:35
Bounds & calc_bounds()
Calculates and sets the data bounds using the minimum and maximum values of each neuron.
Definition: data.cpp:294
Data sample(unsigned first, unsigned last) const
Samples the data.
Definition: data.cpp:192
void merge_with(const Bounds &other)
Merges the values from another Bounds with the current, retaining the limits.
Definition: data.cpp:63
void load_file(std::ifstream &inp)
Reads the scaling information from a file stream.
Definition: data.cpp:84
const unsigned file_precision
Precision used when writting floating point number to files.
Definition: utils.hpp:29
STL namespace.
virtual ~Bounds()
Definition: data.cpp:59
Minimum / maximum scaling information.
Definition: data.hpp:33
void save_file(const std::string &path) const
Erases the contents of a file and saves the Data values into it.
Definition: data.cpp:254
void scale()
Scales the data, neuron per neuron, using the current from and to bounds.
Definition: data.cpp:324
size_t n_total
Number of inputs + outputs per row.
Definition: data.hpp:267
thrown when calling a function with invalid parameters
Definition: utils.hpp:41
Bounds to
Scaled data bounds.
Definition: data.hpp:264
Data scaling information, for all input and output neurons.
Definition: data.hpp:47
double scale(const SInfo &to, double value) const
Definition: data.cpp:39
void save_file(std::ofstream &out) const
Appends the data bounds values into a file stream.
Definition: data.cpp:75
The exception can be identified by the type() method.
Definition: utils.hpp:59
double min
Definition: data.hpp:41
double max
Definition: data.hpp:41
void save_info_file(const std::string &path) const
Erases the contents of a file and saves the Data bounds info into it.
Definition: data.cpp:277
std::vector< double > Row
Data row.
Definition: data.hpp:90
Data used in training, validation and testing.
Definition: data.hpp:95
thrown when a file has invalid contents
Definition: utils.hpp:38
void descale()
Descales the data.
Definition: data.cpp:339
unsigned load_file(const std::string &path)
Reads values from a file and appends then into the data.
Definition: data.cpp:216
void shuffle()
Randomizes the positions of the rows.
Definition: data.cpp:187
Data drop_column(unsigned col) const
Returns a copy of the current object, without a column.
Definition: data.cpp:153
Row & add()
Adds a row to the data, returning a reference to it.
Definition: data.cpp:210
virtual ~Data()
Definition: data.cpp:121