SuperNN  1.0.0
data.cpp
1 /*
2  This file is part of SuperNN.
3 
4  SuperNN is free software: you can redistribute it and/or modify
5  it under the terms of the GNU Lesser General Public License as published by
6  the Free Software Foundation, either version 3 of the License, or
7  (at your option) any later version.
8 
9  SuperNN is distributed in the hope that it will be useful,
10  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  GNU Lesser General Public License for more details.
13 
14  You should have received a copy of the GNU Lesser General Public License
15  along with SuperNN. If not, see <http://www.gnu.org/licenses/>.
16 
17  Copyright (C) 2010 - 2015 Lucas Hermann Negri
18 */
19 
20 #include <locale>
21 #include <algorithm>
22 #include "data.hpp"
23 
24 namespace SuperNN
25 {
27 {
28 }
29 
31 {
32 }
33 
34 SInfo::SInfo(double _min, double _max) : min(_min), max(_max)
35 {
36 }
37 
38 double SInfo::scale(const SInfo &to, double value) const
39 {
40  const double brange = max - min;
41  const double srange = to.max - to.min;
42 
43  if(brange < FSMALL || srange < FSMALL)
44  {
45  return (to.max + to.min) * 2; // mean
46  }
47  else
48  {
49  const double x = (value - min) / brange;
50  return x * srange + to.min;
51  }
52 }
53 
55 {
56 }
57 
59 {
60 }
61 
62 void Bounds::merge_with(const Bounds &other)
63 {
64  for(unsigned n = 0, e = size(); n < e; ++n)
65  {
66  SInfo &i1 = at(n);
67  const SInfo &i2 = other.at(n);
68 
69  i1.min = std::min(i1.min, i2.min);
70  i1.max = std::max(i1.max, i2.max);
71  }
72 }
73 
74 void Bounds::save_file(std::ofstream &out) const
75 {
76  out << size() << std::endl;
77  out.precision(file_precision);
78 
79  for(unsigned b = 0, e = size(); b < e; ++b)
80  out << at(b).min << "\t" << at(b).max << std::endl;
81 }
82 
83 void Bounds::load_file(std::ifstream &inp)
84 {
85  unsigned n;
86  inp >> n;
87 
88  resize(n);
89 
90  for(unsigned i = 0; i < n; ++i)
91  inp >> at(i).min >> at(i).max;
92 }
93 
94 void Bounds::load_file(const std::string &path, Bounds &from, Bounds &to)
95 {
96  std::ifstream inp;
97  inp.open(path.c_str());
98 
99  if(!inp.is_open())
101 
102  inp.imbue(std::locale("C"));
103 
104  from.load_file(inp);
105  to.load_file(inp);
106 
107  inp.close();
108 }
109 
111 {
112 }
113 
114 Data::Data(unsigned rows, unsigned cols) : std::vector<Row>(rows, Row(cols)), n_total(cols)
115 {
116  from.resize(cols);
117  to.resize(cols);
118 }
119 
121 {
122 }
123 
124 Data::Data(unsigned rows, unsigned cols, const double st[])
125  : std::vector<Row>(rows, Row(cols)), n_total(cols)
126 {
127  from.resize(cols);
128  to.resize(cols);
129 
130  for(unsigned r = 0; r < rows; ++r)
131  {
132  Row &a = at(r);
133 
134  for(unsigned c = 0; c < cols; ++c)
135  a[c] = st[r * cols + c];
136  }
137 }
138 
139 Data::Data(unsigned rows, unsigned cols, const Row &row)
140  : std::vector<Row>(rows, Row(cols)), n_total(cols)
141 {
142  from.resize(cols);
143  to.resize(cols);
144 
145  for(unsigned r = 0; r < rows; ++r)
146  {
147  Row &a = at(r);
148 
149  for(unsigned c = 0; c < cols; ++c)
150  a[c] = row[r * cols + c];
151  }
152 }
153 
154 Data Data::drop_column(unsigned col) const
155 {
156  if(col >= n_total)
158 
159  Data new_data;
160  new_data.reserve(size());
161  new_data.n_total = n_total - 1;
162 
163  /* bounds */
164  for(unsigned i = 0; i < n_total; ++i)
165  {
166  if(i != col)
167  {
168  new_data.from.push_back(from[i]);
169  new_data.to.push_back(to[i]);
170  }
171  }
172 
173  /* content */
174  for(unsigned i = 0, e = size(); i < e; ++i)
175  {
176  Row r;
177 
178  for(unsigned j = 0; j < n_total; ++j)
179  {
180  if(j != col)
181  r.push_back(at(i)[j]);
182  }
183  new_data.push_back(r);
184  }
185 
186  return new_data;
187 }
188 
190 {
191  random_shuffle(begin(), end());
192 }
193 
194 Data Data::sample(unsigned first, unsigned last) const
195 {
196  const int n_rows = last - first;
197 
198  if(n_rows < 0)
200 
201  Data new_data(n_rows, n_total);
202 
203  for(int i = 0; i < n_rows; ++i)
204  new_data[i] = at(first + i);
205 
206  new_data.from = from;
207  new_data.to = to;
208 
209  return new_data;
210 }
211 
213 {
214  push_back(Row(n_total));
215  return back();
216 }
217 
218 unsigned Data::load_file(const std::string &path)
219 {
220  std::ifstream inp;
221  inp.open(path.c_str());
222 
223  if(!inp.is_open())
225 
226  inp.imbue(std::locale("C"));
227 
228  unsigned n_rows;
229  inp >> n_rows >> n_total;
230 
231  from.resize(n_total);
232  to.resize(n_total);
233 
234  if(n_rows < 1 || n_total < 2)
235  {
236  inp.close();
238  }
239 
240  reserve(size() + n_rows);
241 
242  for(unsigned r = 0; r < n_rows; ++r)
243  {
244  Row row(n_total);
245 
246  for(unsigned i = 0; i < n_total; ++i)
247  inp >> row[i];
248 
249  push_back(row);
250  }
251 
252  inp.close();
253  return n_rows;
254 }
255 
256 void Data::save_file(const std::string &path) const
257 {
258  std::ofstream out;
259  out.open(path.c_str());
260 
261  if(!out.is_open())
263 
264  out.imbue(std::locale("C"));
265 
266  out << size() << " " << n_total << std::endl;
267  out.precision(file_precision);
268 
269  for(unsigned r = 0, e = size(); r < e; ++r)
270  {
271  for(unsigned i = 0; i < n_total; ++i)
272  out << at(r)[i] << " ";
273 
274  out << std::endl;
275  }
276 
277  out.close();
278 }
279 
280 void Data::save_info_file(const std::string &path) const
281 {
282  std::ofstream out;
283  out.open(path.c_str());
284 
285  if(!out.is_open())
287 
288  out.imbue(std::locale("C"));
289  out.precision(file_precision);
290 
291  from.save_file(out);
292  to.save_file(out);
293 
294  out.close();
295 }
296 
298 {
299  from.resize(n_total);
300 
301  for(unsigned n = 0; n < n_total; ++n)
302  {
303  double min = at(0)[n], max = at(0)[n];
304 
305  for(unsigned p = 1, e = size(); p < e; ++p)
306  {
307  const double v = at(p)[n];
308 
309  if(v < min)
310  min = v;
311  else if(v > max)
312  max = v;
313  }
314 
315  from[n] = SInfo(min, max);
316  }
317 
318  return from;
319 }
320 
321 void Data::scale_column(unsigned n, const SInfo &curv, const SInfo &newv)
322 {
323  for(unsigned p = 0, e = size(); p < e; ++p)
324  at(p)[n] = curv.scale(newv, at(p)[n]);
325 }
326 
328 {
329  for(unsigned c = 0; c < n_total; ++c)
330  scale_column(c, from[c], to[c]);
331 }
332 
333 void Data::scale(double min, double max)
334 {
335  for(unsigned c = 0; c < n_total; ++c)
336  {
337  to[c] = SInfo(min, max);
338  scale_column(c, from[c], to[c]);
339  }
340 }
341 
343 {
344  for(unsigned c = 0; c < n_total; ++c)
345  scale_column(c, to[c], from[c]);
346 }
347 
348 void Data::k_fold(unsigned n, unsigned k, Data &p, Data &l) const
349 {
350  for(unsigned r = 0, e = size(); r < e; ++r)
351  {
352  if(r % k == n)
353  p.push_back(at(r));
354  else
355  l.push_back(at(r));
356  }
357 }
358 
359 }
void k_fold(unsigned n, unsigned k, Data &p, Data &l) const
Fills two Data objects with complementary information, useful for cross-validation.
Definition: data.cpp:348
void scale_column(unsigned n, const SInfo &curv, const SInfo &newv)
Scales a single data column.
Definition: data.cpp:321
Bounds from
Original data bounds.
Definition: data.hpp:261
thrown when a file couldn't be opened
Definition: utils.hpp:45
Bounds & calc_bounds()
Calculates and sets the data bounds using the minimum and maximum values of each neuron.
Definition: data.cpp:297
Data sample(unsigned first, unsigned last) const
Samples the data.
Definition: data.cpp:194
void merge_with(const Bounds &other)
Merges the values from another Bounds with the current, retaining the limits.
Definition: data.cpp:62
void load_file(std::ifstream &inp)
Reads the scaling information from a file stream.
Definition: data.cpp:83
const unsigned file_precision
Precision used when writting floating point number to files.
Definition: utils.hpp:39
STL namespace.
virtual ~Bounds()
Definition: data.cpp:58
Minimum / maximum scaling information.
Definition: data.hpp:33
void save_file(const std::string &path) const
Erases the contents of a file and saves the Data values into it.
Definition: data.cpp:256
void scale()
Scales the data, neuron per neuron, using the current from and to bounds.
Definition: data.cpp:327
size_t n_total
Number of inputs + outputs per row.
Definition: data.hpp:267
thrown when calling a function with invalid parameters
Definition: utils.hpp:51
Bounds to
Scaled data bounds.
Definition: data.hpp:264
Data scaling information, for all input and output neurons.
Definition: data.hpp:47
double scale(const SInfo &to, double value) const
Definition: data.cpp:38
void save_file(std::ofstream &out) const
Appends the data bounds values into a file stream.
Definition: data.cpp:74
The exception can be identified by the type() method.
Definition: utils.hpp:69
double min
Definition: data.hpp:41
double max
Definition: data.hpp:41
void save_info_file(const std::string &path) const
Erases the contents of a file and saves the Data bounds info into it.
Definition: data.cpp:280
std::vector< double > Row
Data row.
Definition: data.hpp:90
Data used in training, validation and testing.
Definition: data.hpp:95
thrown when a file has invalid contents
Definition: utils.hpp:48
void descale()
Descales the data.
Definition: data.cpp:342
unsigned load_file(const std::string &path)
Reads values from a file and appends then into the data.
Definition: data.cpp:218
void shuffle()
Randomizes the positions of the rows.
Definition: data.cpp:189
Data drop_column(unsigned col) const
Returns a copy of the current object, without a column.
Definition: data.cpp:154
Row & add()
Adds a row to the data, returning a reference to it.
Definition: data.cpp:212
virtual ~Data()
Definition: data.cpp:120