Source

csv2textable / csv2textable.py

Full commit
#!/usr/bin/env python
import os
import re
import sys
import shutil
import subprocess
import logging
import csv
from optparse import OptionParser


logging.basicConfig(stream=sys.stderr, level=logging.DEBUG,
                    format='\x1b[33m%(message)s\x1b[0m')

usage = """usage: %prog [options] <csv-file>"""
parser = OptionParser(usage=usage)
parser.add_option("-r", "--header", dest="header", type="int",
                  help="Header line number")
parser.add_option("-c", "--cols", dest="cols",
                  help="Only include specified columns i.e. 1,10,13")
(options, pos_args) = parser.parse_args()


def main():
    if not len(pos_args) == 1:
        sys.stderr.write("CSV-formatted proposal.\n\n")
        parser.print_usage()
        sys.exit(-1)
    csv_name = pos_args[0]
    logging.debug('converting: {0}'.format(csv_name))
    if options.header and options.header > 0:
        # 1-indexed
        options.header -= 1
    header, lines = read_csv(open(csv_name))
    if options.cols:
        cols = map(lambda x: int(x), options.cols.split(','))
        logging.info('specified columns: %s' % cols)
    else:
        cols = longest_row(lines)
    # write latex
    if isinstance(cols, list):
        align = 'l' * len(cols)
    else:
        align = 'l' * cols
    write(r"\begin{tabular}{%s}" % align)
    if header:
        row(header, cols)
        write(r'\toprule')
    for line in lines:
        row(line, cols)
    write(r"\end{tabular}")


def clean_cell(cell):
    cell = cell.replace('&', r' \& ')
    cell = cell.replace('_', r'\_')
    cell = cell.replace('%', r' \% ')
    cell = cell.replace('$', r'\$')
    cell = cell.replace('<', r'$<$')
    cell = cell.replace('>', r'$>$')
    return cell


def longest_row(lines):
    longest = 0
    for line in lines:
        if len(line) > longest:
            longest = len(line)
    return longest


def section_row(data):
    return not any(data[1:])


def row(data, cols):
    data = map(clean_cell, data)
    if isinstance(cols, list):
        subset = []
        for i in cols:
            try:
                subset.append(data[i])
            except IndexError, e:
                subset.append('')
        data = subset
        if section_row(data):
            logging.debug('section: %s' % data[0])
            data = [r'\multicolumn{%s}{l}{\textbf{%s}}' % (len(cols), data[0])]
    else:
        if not len(data) == cols:
            data.extend([''] * (cols - len(data)))
    data = ' & '.join(data)
    data += r' \\'
    write(data)


def write(data):
    print data


def read_csv(fh):
    try:
        dialect = csv.Sniffer().sniff(fh.read(1024))
    except csv.Error:
        dialect = csv.excel
    fh.seek(0)
    csv_reader = csv.reader(fh)
    lines = []
    header = []
    for i, line in enumerate(csv_reader):
        if options.header and i <= options.header:
            if i == options.header:
                logging.debug('headers: {0}'.format(', '.join(line)))
                header = line
            continue
        lines.append(line)
    return header, lines


if __name__ == "__main__":
    main()
    sys.exit(0)