Peter Ward avatar Peter Ward committed 2fd426d

add old-style lyrics parser

Comments (0)

Files changed (1)

+from collections import namedtuple
+import re
+
+from utils import group_paragraphs
+
+VERSE_REFERENCE = re.compile(r'^\((.*)\)(?: x([0-9]+))?$')
+VERSE_LABEL = re.compile(r'^(.*?[^:])(?: \((.*?)\))?:$')
+REPEAT_INDICATOR = re.compile(r'\(repeat(?: x([0-9]+))?\)')
+
+INITIAL_INFO = ('title', 'authors')
+
+class LyricsFormatError(ValueError):
+    pass
+
+Verse = namedtuple('Verse', 'label parent content n_repeats')
+
+def parse_info(lines, initial=INITIAL_INFO):
+    '''Parse the info section of a lyrics file.'''
+    info = {}
+
+    # Read the initial lines of the file.
+    for key in initial:
+        try:
+            value = next(lines)
+        except StopIteration:
+            raise LyricsFormatError, "Missing %s line." % (key,)
+
+        info[key] = value
+
+    # Each line after that looks like "key: value".
+    for line in lines:
+        try:
+            key, value = line.split(': ', 1)
+        except ValueError:
+            raise LyricsFormatError, \
+                'Info lines should be in the format "key: value"'
+
+        key = key.lower()
+
+        if key in initial:
+            raise LyricsFormatError, "%s has been defined (implicitly)."
+
+        if key in info:
+            raise LyricsFormatError, "%s has already been defined."
+
+        info[key] = value
+
+    return info
+
+def normalise_label(label):
+    if label is None:
+        return label
+    return label.title()
+
+def parse_n_repeats(n, default=1):
+    if n is None:
+        return default
+    return int(n)
+
+def parse_verses(paragraphs):
+    """
+    Parse the remainder of the lyrics file.
+
+    If the verse is prefixed with "some string:", then this is treated as a
+    verse label, so that it can be reused later.
+
+    In order to putting a colon at the end of the first line of a verse, if
+    there are two colons, e.g., "some string::", then a single colon is removed,
+    and it is not treated as a verse label.
+
+    In addition, if the verse label looks like "some string (another one)" and
+    "another one" was a previously defined verse label, this denotes that the
+    verse is similar to a previously defined verse. This is only used to denote
+    semantics, it doesn't affect the behaviour of the output.
+
+    If the verse consists of the line "(some string)", and "some string" was
+    previously defined as a verse label, then the previous verse is inserted.
+
+    The input is an iterator of lines (without trailing newlines), and the
+    output is an iterator of (label, parent, text, n_repeats).
+    """
+
+    # mapping of verse label -> text
+    verses = {}
+
+    # we keep track of the last verse to be included via reference,
+    # so we can complain loudly when you write something like:
+    # (Chorus)
+    # (Chorus)
+    # instead of
+    # (Chorus x2)
+    # This may seem petty, but it makes my life saner.
+    last_ref = None
+
+    REPEATED_REF_ERROR = LyricsFormatError(
+        "Repeated reference to verse, don't do that."
+    )
+
+    for lines in paragraphs:
+        paragraph = []
+
+        first_line = next(lines)
+
+        # is this a reference to a previous verse?
+        m = VERSE_REFERENCE.match(first_line)
+        if m:
+            ref = normalise_label(m.group(1))
+            n_repeats = parse_n_repeats(m.group(2)) - 1
+
+            if ref == last_ref:
+                raise REPEATED_REF_ERROR
+            last_ref = ref
+
+            yield Verse(ref, ref, verses[ref], n_repeats)
+
+            # we allow multiple references in the same paragraph
+            for line in lines:
+                m = VERSE_REFERENCE.match(line)
+                if not m:
+                    raise LyricsFormatError(
+                        "Found non-reference in the same paragraph as a "
+                        "reference."
+                    )
+
+                ref = normalise_label(m.group(1))
+                n_repeats = parse_n_repeats(m.group(2)) - 1
+
+                if ref == last_ref:
+                    raise REPEATED_REF_ERROR
+                last_ref = ref
+
+                yield Verse(ref, ref, verses[ref], n_repeats)
+
+            continue
+
+        last_ref = None
+
+        label = parent = None
+
+        # otherwise, see if this verse has a label on it
+        m = VERSE_LABEL.match(first_line)
+
+        if m:
+            label = normalise_label(m.group(1))
+            parent = normalise_label(m.group(2))
+
+        else:
+            # if not, remove a single colon from the end
+            # (see docstring for why)
+            if first_line.endswith(':'):
+                first_line = first_line[:-1]
+            paragraph.append(first_line)
+
+        # add unparsed lines onto the end of the paragraph
+        paragraph.extend(lines)
+
+        n_repeats = 0
+
+        # check if the verse ends with "(repeat xN)"
+        if paragraph:
+            last_line = paragraph[-1]
+            m = REPEAT_INDICATOR.match(last_line)
+            if m:
+                # if so, remove that line, and set n_repeats
+                paragraph.pop()
+                n_repeats = parse_n_repeats(m.group(1))
+
+        content = '\n'.join(paragraph)
+        if label:
+            verses[label] = content
+
+        yield Verse(label, parent, content, n_repeats)
+
+def parse(lines):
+    lines = (line.strip() for line in lines)
+    paragraphs = group_paragraphs(iter(lines))
+
+    try:
+        info = parse_info(next(paragraphs))
+    except StopIteration:
+        raise LyricsFormatError, "No info paragraph (is the file empty?)"
+
+    return info, parse_verses(paragraphs)
+
+def dumps(info, verses, initial=INITIAL_INFO):
+    output = []
+    write = output.append
+
+    # dump out the info header
+    for key in initial:
+        value = info.pop(key)
+        assert '\n' not in value
+        write(value)
+
+    for key, value in sorted(info.items()):
+        assert '\n' not in value
+        write('%s: %s' % (key, value))
+
+    previous = {}
+
+    for label, parent, content, n_repeats in verses:
+        write('')
+
+        # check if this is a reference to a previously printed verse
+        if (
+            label is not None and
+            label == parent and
+            content == previous.get(parent)
+        ):
+            line = '(' + parent + ')'
+            if n_repeats > 0:
+                line += ' x%d' % (n_repeats + 1)
+            write(line)
+            continue
+
+        # otherwise, if it has a label, print it
+        if label is not None:
+            line = label
+            # with the parent if it has one
+            if parent is not None:
+                line += ' (' + parent + ')'
+            line += ':'
+            write(line)
+
+        # then print the verse itself
+        write(content)
+
+        # then the (repeat xN) label
+        if n_repeats == 1:
+            write('(repeat)')
+        elif n_repeats > 1:
+            write('(repeat x%d)' % n_repeats)
+
+        previous[label] = content
+
+    return '\n'.join(output)
+
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.