Source

ophelia / ophelia / input.py

Full commit
# Copyright (c) 2006-2008 Thomas Lotze
# See also LICENSE.txt

import re

import zope.interface

import ophelia.interfaces


XML_DECLARATION = re.compile("(<\?xml([^<>]*)\?>)")
CODING_PATTERN = re.compile("coding[=:]\s*\"?\s*([-\w.]+)\s*\"?")


class Splitter(object):
    """Splitter decomposing a file into Python script and template.

    Instantiate as Splitter(**options).
    """

    zope.interface.implements(ophelia.interfaces.ISplitterAPI)

    script_encoding = None
    template_encoding = None

    def __init__(self, **options):
        self.script_encoding = options.get("script_encoding", "ascii")
        self.template_encoding = options.get("template_encoding", "ascii")

    def __call__(self, content):
        """Split file content into Python script and template.

        content: str

        returns (unicode, unicode): Python script and template

        may raise ValueError if <?xml ... ?> is not closed
        """
        parts = XML_DECLARATION.split(content, 1)
        if len(parts) == 1:
            script, xml_options, template = "", "", content
            line_offset = row_offset = 0
        else:
            script, xml_declaration, xml_options, template = parts
            pre_lines = (script + xml_declaration).splitlines()
            line_offset = len(pre_lines) - 1
            row_offset = len(pre_lines[-1])

        # XXX This is a bad hack in order to keep the splitter API backwards
        # compatible during the 0.3 series:
        self._last_template_offset = (line_offset, row_offset)

        template_encoding = self.template_encoding
        coding_match = CODING_PATTERN.search(xml_options)
        if coding_match:
            template_encoding = coding_match.group(1)

        template = template.decode(template_encoding)

        script_encoding = self.script_encoding
        script = script.strip()
        if script.startswith("#"):
            lines = script.splitlines(True)
            coding_match = CODING_PATTERN.search(lines[0])
            if coding_match:
                script_encoding = coding_match.group(1)
                del lines[0]
            script = "".join(lines)

        script = script.decode(script_encoding)

        return script, template