stevejb avatar stevejb committed 5115bb5

This is showing how to use boost to easily parse CSV files.

Comments (0)

Files changed (3)

test_csv_parse/data.csv

+1.123,2.5234233,3.1212334,2
+4,5,6.12312312,3
+7.54321,8.1000000112312,9,4

test_csv_parse/data2.csv

+1,2,3,4
+5 , 7, 1 , 9
+1 , 2 ,3,1
+1,2,3,4,5,6,7,8
+1,2,3,"4"
+"123"
+

test_csv_parse/test_csv_parse.cpp

+#include <iostream>
+#include <iomanip>
+#include <fstream>
+#include "mkl.h"
+#include "math.h"
+#include <vector>
+#include <cmath>
+#include <string>
+#include <cstdlib>
+
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <getopt.h>
+
+#define EIGEN_USE_MKL_BLAS
+#define EIGEN_USE_MKL_LAPACKE
+#define NDEBUG
+
+#ifndef PI
+#define PI 3.141592653589793
+#endif
+
+
+#include <Eigen/Dense>
+
+#include <boost/tokenizer.hpp>
+
+
+using namespace boost;
+using namespace Eigen;
+using namespace std;
+
+// THIS IS A TYPEDEF FOR A ROWMAJOR MATRIX
+typedef Eigen::Matrix<double,Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> MatrixRMXd;
+
+
+
+////////////////////////////////////////
+// PARSE THE OPTIONS
+void parseTheOptions(int argc, char *argv[], 
+                     string& fname) {
+  cout << "This is the argument parsing function" << endl;
+
+    // process the command line arguments
+  while(1)
+  {
+      int c;
+      int digit_optind = 0;
+      int this_option_optind = optind ? optind : 1;
+      int option_index = 0;
+      static struct option long_options[] = {
+        {"input"    , 1, 0, 'u' },
+        {0,0,0,0}
+      };
+
+      c = getopt_long_only(argc, argv, "abc:d:012",
+                           long_options, &option_index);
+
+      if (c == -1)
+        break;
+
+      switch(c)
+      {
+        case 'u':
+          fname = optarg;
+          break;
+      }
+
+  } // end while 
+}  // end opt parse
+
+
+////////////////////////////////////////
+// parse csv file into a Matrix of Integers
+// assumes no header
+// (yes this is ugly)
+// NOTE: assumes that the "right" number of columns is the number
+//       of columns in the first row
+MatrixXi parseCSVfile_int(string infilename) {
+
+    ifstream in(infilename.c_str());
+    if (!in.is_open()) return MatrixXi(1,1);
+
+    typedef tokenizer< escaped_list_separator<char> > Tokenizer;
+
+    vector< string > vec;
+    string line;
+    vector< vector< string > > matrows;
+
+
+    while (getline(in,line))
+    {
+        Tokenizer tok(line);
+        vec.assign(tok.begin(),tok.end());
+
+	// // Print each row
+        // copy(vec.begin(), vec.end(),
+        //      ostream_iterator<string>(cout, "|"));
+        // cout << "\n----------------------" << endl;
+	
+	matrows.push_back(vec);
+    }
+    in.close();
+
+    // FIGURE OUT HOW MANY OF THE ROWS HAVE THE RIGHT NUMBER
+    // OF COLUMNS
+    int Nrows = matrows.size();
+    int Ncols = matrows[0].size();
+    int Ngoodrows = 0;
+    for(int i = 0; i < Nrows; i++) {
+      if(matrows[i].size() == Ncols) {
+	Ngoodrows++;
+      }
+    }
+
+
+    // TRANSFORM THE VECTOR OF ROWS INTO AN EIGEN INTEGER MATRIX
+    MatrixXi xmat = MatrixXi(Ngoodrows, Ncols);
+    cout << "INPUT MATRIX: " << Nrows << "x" << Ncols << endl;
+    int rc = 0;
+    for(int i = 0; i < Nrows; i++) {
+      int rowsize = matrows[i].size();
+      if(rowsize != Ncols) {
+	cout << "Row " << i << " has bad column count" << endl;
+	continue;
+      } 
+      for(int j = 0; j < Ncols; j++) {
+	xmat(rc,j) = int(round(strtod(matrows[i][j].c_str(), NULL)));
+      }
+      rc++;
+    }
+
+
+    return(xmat);
+}
+
+
+////////////////////////////////////////
+// parse csv file into a Matrix of Integers
+// assumes no header
+// (yes this is ugly)
+// NOTE: assumes that the "right" number of columns is the number
+//       of columns in the first row
+MatrixRMXd parseCSVfile_dec(string infilename) {
+
+    ifstream in(infilename.c_str());
+    if (!in.is_open()) return MatrixRMXd(1,1);
+
+    typedef tokenizer< escaped_list_separator<char> > Tokenizer;
+
+    vector< string > vec;
+    string line;
+    vector< vector< string > > matrows;
+
+
+    while (getline(in,line))
+    {
+        Tokenizer tok(line);
+        vec.assign(tok.begin(),tok.end());
+
+	// // Print each row
+        // copy(vec.begin(), vec.end(),
+        //      ostream_iterator<string>(cout, "|"));
+        // cout << "\n----------------------" << endl;
+	
+	matrows.push_back(vec);
+    }
+    in.close();
+
+    // FIGURE OUT HOW MANY OF THE ROWS HAVE THE RIGHT NUMBER
+    // OF COLUMNS
+    int Nrows = matrows.size();
+    int Ncols = matrows[0].size();
+    int Ngoodrows = 0;
+    for(int i = 0; i < Nrows; i++) {
+      if(matrows[i].size() == Ncols) {
+	Ngoodrows++;
+      }
+    }
+
+
+    // TRANSFORM THE VECTOR OF ROWS INTO AN EIGEN INTEGER MATRIX
+    MatrixRMXd xmat = MatrixRMXd(Ngoodrows, Ncols);
+    cout << "INPUT MATRIX: " << Nrows << "x" << Ncols << endl;
+    int rc = 0;
+    for(int i = 0; i < Nrows; i++) {
+      int rowsize = matrows[i].size();
+      if(rowsize != Ncols) {
+	cout << "Row " << i << " has bad column count" << endl;
+	continue;
+      } 
+      for(int j = 0; j < Ncols; j++) {
+	xmat(rc,j) = strtod(matrows[i][j].c_str(), NULL);
+      }
+      rc++;
+    }
+
+
+    return(xmat);
+}
+
+
+
+int main(int argc, char **argv) 
+{
+
+    string fname;
+
+    parseTheOptions(argc, argv, fname);
+
+    
+    MatrixXi parsed = parseCSVfile_int(fname);
+    cout << "PARSED INT: " << endl;
+    cout << parsed << endl << endl;
+
+    MatrixRMXd parsed_dec = parseCSVfile_dec(fname);
+    cout << "PARSED DEC: " << endl;
+    cout << setprecision(10) <<  parsed_dec << endl << endl;
+}
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.