Commits

David McClosky committed 1950584

Add selectcsvcolumns

Comments (0)

Files changed (1)

+#!/usr/bin/env python
+"""
+selectcsvcolumns: select only certain columns from CSV files
+(each CSV file can have a different header)
+
+Usage: selectcsvcolumns list-of-columns csv-filenames
+
+    list-of-columns is a comma-separated list of column names you'd like to extract, e.g. a,b,c
+    If these names include spaces, you'll need to quote the whole list:
+
+        selectcolumns "a name with spaces,another one,and so on" filename1 filename2
+
+    If these names include commas, you're screwed.
+
+    Lots of error checking is missing, but basic functionality is here.
+"""
+
+# Author: David McClosky
+# Homepage: http://zorglish.org
+# Code: http://code.zorglish.org
+
+import sys, csv
+
+if len(sys.argv) < 3:
+    print __doc__.strip()
+    sys.exit(1)
+
+columns = sys.argv[1].split(',')
+output_csv_writer = csv.writer(sys.stdout)
+output_csv_writer.writerow(columns)
+
+for filename in sys.argv[2:]:
+    reader = csv.reader(file(filename))
+    # assume first line is a header and learn the names of columns from it
+    header = reader.next()
+    header_mapping = dict((v, k) for (k, v) in enumerate(header))
+    for row in reader:
+        # pick out only the columns that people asked for
+        selected_row = [row[header_mapping[column]] for column in columns]
+        output_csv_writer.writerow(selected_row)