syntaxhighlighter /

# -*- coding: utf-8 -*-
Partition-based syntax highlighter
import re
from PyQt4.QtCore import QVariant
from PyQt4.QtGui import (
    QColor, QTextCharFormat, QFont, QBrush, QTextFormat)

class Format(object):

    __slots__ = ("NAME", "name", "tcf")
    NAME = QTextFormat.UserProperty + 1

    def __init__(self, name, color=None, bold=None, italic=None, base_format=None): = name
        tcf = QTextCharFormat()
        if base_format is not None:
            if isinstance(base_format, Format):
                base_format = base_format.tcf
        if color is not None:
            if not isinstance(color, QColor):
                color = QColor(color)
        if bold is not None:
            if bold:
        if italic is not None:
        tcf.setProperty(Format.NAME, QVariant(name))
        self.tcf = tcf

class Partition(object):
    # every partition maps to a specific state in QSyntaxHighlighter
    __slots__ = ("name", "start", "end", "is_multiline", "search_end")

    def __init__(self, name, start, end, is_multiline=False): = name
        self.start = start
        self.end = end
        self.is_multiline = is_multiline
        self.search_end = re.compile(end, re.M|re.S).search

class PartitionScanner(object):
    # The idea to partition the source into different contexts comes from Eclipse.

    def __init__(self, partitions):
        start_groups = []
        self.partitions = []
        for i, p in enumerate(partitions):
            if isinstance(p, (tuple,list)):
                p = Partition(*p)
            start_groups.append("(?P<g%s_%s>%s)" % (i,, p.start))
        start_pat = "|".join(start_groups)
        self.search_start = re.compile(start_pat, re.M|re.S).search

    def scan(self, current_state, text):
        last_pos = 0
        length = len(text)
        parts = self.partitions
        search_start = self.search_start
        # loop yields (start, end, partition, new_state, is_inside)
        while last_pos < length:
            if current_state == -1:
                found = search_start(text, last_pos)
                if found:
                    start, end = found.span()
                    yield last_pos, start, None, -1, True
                    current_state = found.lastindex - 1
                    p = parts[current_state]
                    yield start, end,, current_state, False
                    last_pos = end
                    current_state = -1
                    yield last_pos, length, None, -1, True
                p = parts[current_state]
                found = p.search_end(text, last_pos)
                if found:
                    start, end = found.span()
                    yield last_pos, start,, current_state, True
                    yield start, end,, current_state, False
                    last_pos = end
                    current_state = -1
                    yield last_pos, length,, current_state, True
        if current_state != -1:
            p = parts[current_state]
            if not p.is_multiline:
                current_state = -1
        yield length, length, None, current_state, False

class Token(object):
    __slots__ = ("name", "pattern", "prefix", "suffix")

    def __init__(self, name, pattern, prefix="", suffix=""): = name
        if isinstance(pattern, list):
            pattern = "|".join(pattern)
        self.pattern = pattern
        self.prefix = prefix
        self.suffix = suffix


class Scanner(object):
    __slots__ = ("tokens", "search")

    def __init__(self, tokens):
        self.tokens = []
        groups = []
        for t in tokens:
            if isinstance(t, (list,tuple)):
                t = Token(*t)
            elif isinstance(t, dict):
                t = Token(**t)
                assert isinstance(t, Token), "Token expected, got %r" % t
            gdef = "?P<%s>"  %
            if gdef in t.pattern:
                p = t.pattern
                p = ("(%s%s)" % (gdef, t.pattern)) 
            p = t.prefix + p + t.suffix
        pat = "|".join(groups) = re.compile(pat).search

    def scan(self, s):
        search =
        #length = len(s)
        last_pos = 0
        # loop yields (token, start_pos, end_pos)
        while 1:
            found = search(s, last_pos)
            if found:
                lg = found.lastgroup
                start, last_pos = found.span(lg)
                yield lg, start, last_pos

class SyntaxHighlighter(QSyntaxHighlighter):

    def __init__(self, parent, partition_scanner, scanner, formats, default_font=None):
        :param parent: QDocument or QTextEdit/QPlainTextEdit instance
            PartitionScanner instance
        :param scanner: 
            dictionary of token scanners for each partition
            The key is the name of the partition, the value is a Scanner instance
            The default scanner has the key None
            list of tuples consisting of a name and a format definition
            The name is the name of a partition or token
        QSyntaxHighlighter.__init__(self, parent)
        self.partition_scanner = partition_scanner
        self.scanner = scanner

        self.formats = {}
        for fname, f in formats:
            if isinstance(f, basestring):
                f = (f,) # only color specified
            if isinstance(f, (tuple,list)):
                f = Format(*((fname,) + f))
            elif isinstance(f, dict):
                f = Format(**dict(name=fname, **f))
                assert isinstance(f, Format), "Format expected, %r found" % f
            self.formats[] = f.tcf

        scan_inside = {}
        for inside_part, inside_scanner in self.scanner.items():
            scan_inside[inside_part] = inside_scanner.scan
        # reduce name look-ups for better speed
        self.get_scanner = scan_inside.get
        self.scan_partitions = partition_scanner.scan
        self.get_format = self.formats.get

    def highlightBlock(self, text):
        "automatically called by Qt"
        text = unicode(text) + "\n"
        previous_state = self.previousBlockState()
        new_state = previous_state
        # speed-up name-lookups
        get_format = self.get_format
        set_format = self.setFormat
        get_scanner = self.get_scanner
        for start, end, partition, new_state, is_inside in self.scan_partitions(previous_state, text):
            f = get_format(partition, None)
            if f:
                set_format(start, end-start, f)
            if is_inside:
                scan = get_scanner(partition)
                if scan:
                    for token, token_pos, token_end in scan(text[start:end]):
                        f = get_format(token)
                        if f:
                            set_format(start+token_pos, token_end-token_pos, f)


def load_syntax(syntax, context=None):
    context = context or {}

    partition_scanner = PartitionScanner(syntax.get("partitions", []))

    scanners = {}
    for part_name, part_scanner in syntax.get("scanner", {}).items():
        scanners[part_name] = Scanner(part_scanner)

    formats = []
    for fname, fstyle in syntax.get("formats", {}).items():
        if isinstance(fstyle, basestring):
            if fstyle.startswith("%(") and fstyle.endswith(")s"):
                key = fstyle[2:-2]
                fstyle = context[key]
                fstyle = fstyle % context

    return partition_scanner, scanners, formats