RPTParser / rptparser.py

import re

column_pattern = re.compile('([A-Za-z]+)')

class RPTParser(object):
    def __init__(self, filename):
        self.filename = filename

    def parse(self):
        """Parses the .rps file."""

        columns = []
        column_lengths = []
        rows = []
        with open(self.filename) as f:
            # Find columns:
            raw_columns = f.readline().split(' ')
            column_count = 0
            for column in raw_columns:
                column = column.strip()
                if len(column):
                    column = column_pattern.findall(column)[0]
                    column_count += 1

            # Get column lengths based on '---- ---':
            lengths = f.readline().split(' ')
            for i in range(column_count):
                length = len(lengths[i]) + 1

            # Loop over rows:
            more_lines = True
            while more_lines:
                line = f.readline()
                if not line:

                # Loop over columns and get slice of row equal to column value's length:
                char = 0
                row = []
                for length in column_lengths:
                    old_char = char
                    char = char + length

                    column_data = line[old_char:char]
                    value = column_data.strip()

                    # Stop looping if we reach a completely empty row value:
                    if len(value) == 0:
                        more_lines = False


                # Don't add empty rows:
                if len(row):

        self.parsed = {
            'columns': columns,
            'rows': rows

    def to_sql(self, table, map_dict=None):
        """The map_dict should be a dictionary with keys of the same names as the
        columns in the .rpt file. Missing columns will be ignored in the output.

            'Id': 'id',
            'VehicleId': 'car_id',

        If not provided, columns will have the same names as before.

        columns = self.parsed['columns']

        # Replace column names
        if map_dict:
            new_columns = []
            for column in columns:
                if column in map_dict:
            columns = new_columns

        # Insert query:
        query = ['INSERT INTO %s (' % table]

        # Comma separated column names:
        query_columns = []
        for column in columns:
        query.append(', '.join(query_columns))

        # Rows:
        query.append(') VALUES ')
        rows = self.parsed['rows']

        query_rows = []
        for row in rows:
            query_row = []
            i = 0
            for value in row:
                # Skip values for Null columns as specified in map_dict:
                if not columns[i]:
                i += 1

                # Add non-numbers with quotes:
                except ValueError:
                    query_row.append('"%s"' % value)

            # Add semicolon on last row:
            new_row = ', '.join(query_row)
            if row != rows[-1]:
                row = '(%s),' % new_row
                row = '(%s);' % new_row

        query.append(' '.join(query_rows))
        return ''.join(query)
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.