1. James Tocknell
  2. shinypress


shinypress / shinypress / lyrics / parser.py

from shinypress.utils import group_paragraphs

class LyricsFormatError(ValueError):

def parse_info(lines):
    Parse the first part of the lyrics format (the info header).
    special = ['title', 'authors']

    info = {}

    for key in special:
            value = lines.next()
        except StopIteration:
            raise LyricsFormatError, "Missing %s line." % (key,)

        info[key] = value

    for line in lines:
            key, value = line.split(': ', 1)
        except ValueError:
            raise LyricsFormatError, \
                'Info lines should be in the format "key: value"'

        key = key.lower()

        if key in special:
            raise LyricsFormatError, "%s has been defined (special key)."

        if key in info:
            raise LyricsFormatError, "%s has already been defined."

        info[key] = value

    return info

import re
VERSE_REFERENCE = re.compile('^\((.*)\)$')
VERSE_LABEL = re.compile('^(.*[^:]):$')

def parse_lyrics(lines):
    Given the lines of a lyrics file, interpret the lyrics according to the
    lyrics format (exemplified in format.txt).

    Lines should not have newline characters at the ends of them:
        lines = (line[:-1] for line in lines)
    may be a useful pattern.

    The result is an iterator - the first item is always the info dictionary,
    followed by any number of verses.

    Each verse is a string with unix newline characters using the Pango markup
    language [1] (as provided in the original source).

    The result is an iterator - the first item is a dictionary with info about
    the lyrics (title, authors, copyright, etc.), and all (if any) items after
    that are strings with unix-style newlines containing each verse (expanded as

    [1] http://library.gnome.org/devel/pango/stable/PangoMarkupFormat.html

    lines = (line.strip() for line in lines)
    paragraphs = group_paragraphs(iter(lines))

        info = parse_info(paragraphs.next())
    except StopIteration:
        raise LyricsFormatError, "No info paragraph (empty file?!)."

    def _iter():
        verses = {}

        for verse in paragraphs:
            paragraph = []

            first_line = verse.next()

            ref = VERSE_REFERENCE.match(first_line)
            if ref:
                ref = ref.group(1).title()
                yield verses[ref]

                for line in verse:
                    ref = VERSE_REFERENCE.match(line)
                    ref = ref.group(1).title()
                    yield verses[ref]

            label = VERSE_LABEL.match(first_line)
            if label:
                label = label.group(1).title()
                if first_line.endswith(':'):
                    first_line = first_line[:-1]

            content = '\n'.join(paragraph + list(verse))
            if label:
                verses[label] = content
            yield content

            verses['repeat'] = content

    return info, _iter()