Commits

Kirill Simonov committed e58ffbe

Add an example: yaml highlight script.

Usage:
{{{
$ cat your_yaml_document | ./yaml_hl.py # Produces colored ASCII output
$ cat your_yaml_document | ./yaml_hl.py -c yaml_hl_html.cfg # Produces HTML output.
}}}

  • Participants
  • Parent commits a4348b1

Comments (0)

Files changed (5)

examples/yaml-hl/yaml_hl.py

+#!/usr/bin/python
+
+import yaml, codecs, sys, optparse
+
+class YAMLHighlight:
+
+    def __init__(self, config):
+        parameters = yaml.load_document(config)
+        self.replaces = parameters['replaces']
+        self.substitutions = {}
+        for domain, items in [('Token', parameters['tokens']),
+                    ('Event', parameters['events'])]:
+            for code in items:
+                name = ''.join([part.capitalize() for part in code.split('-')]+[domain])
+                cls = getattr(yaml, name)
+                value = items[code]
+                if value:
+                    if 'start' in value:
+                        self.substitutions[cls, -1] = value['start']
+                    if 'end' in value:
+                        self.substitutions[cls, +1] = value['end']
+
+    def highlight(self, input):
+        if isinstance(input, str):
+            if input.startswith(codecs.BOM_UTF16_LE):
+                input = unicode(input, 'utf-16-le')
+            elif input.startswith(codecs.BOM_UTF16_BE):
+                input = unicode(input, 'utf-16-be')
+            else:
+                input = unicode(input, 'utf-8')
+        tokens = yaml.parse(input, Parser=iter)
+        events = yaml.parse(input)
+        markers = []
+        number = 0
+        for token in tokens:
+            number += 1
+            if token.start_mark.index != token.end_mark.index:
+                cls = token.__class__
+                if (cls, -1) in self.substitutions:
+                    markers.append([token.start_mark.index, +2, number, self.substitutions[cls, -1]])
+                if (cls, +1) in self.substitutions:
+                    markers.append([token.end_mark.index, -2, number, self.substitutions[cls, +1]])
+        number = 0
+        for event in events:
+            number += 1
+            cls = event.__class__
+            if (cls, -1) in self.substitutions:
+                markers.append([event.start_mark.index, +1, number, self.substitutions[cls, -1]])
+            if (cls, +1) in self.substitutions:
+                markers.append([event.end_mark.index, -1, number, self.substitutions[cls, +1]])
+        markers.sort()
+        markers.reverse()
+        chunks = []
+        position = len(input)
+        for index, weight1, weight2, substitution in markers:
+            if index < position:
+                chunk = input[index:position]
+                for substring, replacement in self.replaces:
+                    chunk = chunk.replace(substring, replacement)
+                chunks.append(chunk)
+                position = index
+            chunks.append(substitution)
+        chunks.reverse()
+        result = u''.join(chunks)
+        return result.encode('utf-8')
+
+if __name__ == '__main__':
+    parser = optparse.OptionParser()
+    parser.add_option('-c', '--config', dest='config', default='yaml_hl_ascii.cfg', metavar='CONFIG')
+    (options, args) = parser.parse_args()
+    hl = YAMLHighlight(file(options.config))
+    sys.stdout.write(hl.highlight(sys.stdin.read()))
+

examples/yaml-hl/yaml_hl_ascii.cfg

+%YAML 1.1
+---
+tokens:
+    stream-start:
+    stream-end:
+    directive:              { start: "\e[35m", end: "\e[0;1;30;40m" }
+    document-start:         { start: "\e[35m", end: "\e[0;1;30;40m" }
+    document-end:           { start: "\e[35m", end: "\e[0;1;30;40m" }
+    block-sequence-start:
+    block-mapping-start:
+    block-end:
+    flow-sequence-start:    { start: "\e[33m", end: "\e[0;1;30;40m" }
+    flow-mapping-start:     { start: "\e[33m", end: "\e[0;1;30;40m" }
+    flow-sequence-end:      { start: "\e[33m", end: "\e[0;1;30;40m" }
+    flow-mapping-end:       { start: "\e[33m", end: "\e[0;1;30;40m" }
+    key:                    { start: "\e[33m", end: "\e[0;1;30;40m" }
+    value:                  { start: "\e[33m", end: "\e[0;1;30;40m" }
+    block-entry:            { start: "\e[33m", end: "\e[0;1;30;40m" }
+    flow-entry:             { start: "\e[33m", end: "\e[0;1;30;40m" }
+    alias:                  { start: "\e[32m", end: "\e[0;1;30;40m" }
+    anchor:                 { start: "\e[32m", end: "\e[0;1;30;40m" }
+    tag:                    { start: "\e[32m", end: "\e[0;1;30;40m" }
+    scalar:                 { start: "\e[36m", end: "\e[0;1;30;40m" }
+
+events:
+    stream-start:   { start: "\e[0;1;30;40m" }
+    stream-end:     { end: "\e[0m" }
+    document-start:
+    document-end:
+    sequence:
+    mapping:
+    collection-end:
+    scalar:
+
+replaces: !!pairs
+    - "\r\n":   "\n"
+    - "\r":     "\n"
+    - "\n":     "\n"
+    - "\x85":   "\n"
+    - "\u2028": "\n"
+    - "\u2029": "\n"
+
+# vim: ft=yaml

examples/yaml-hl/yaml_hl_html.cfg

+
+tokens:
+    stream-start:
+    stream-end:
+    directive:              { start: <code class="directive-token">, end: </code> }
+    document-start:         { start: <code class="document-start-token">, end: </code> }
+    document-end:           { start: <code class="document-end-token">, end: </code> }
+    block-sequence-start:
+    block-mapping-start:
+    block-end:
+    flow-sequence-start:    { start: <code class="delimiter-token">, end: </code> }
+    flow-mapping-start:     { start: <code class="delimiter-token">, end: </code> }
+    flow-sequence-end:      { start: <code class="delimiter-token">, end: </code> }
+    flow-mapping-end:       { start: <code class="delimiter-token">, end: </code> }
+    key:                    { start: <code class="delimiter-token">, end: </code> }
+    value:                  { start: <code class="delimiter-token">, end: </code> }
+    block-entry:            { start: <code class="delimiter-token">, end: </code> }
+    flow-entry:             { start: <code class="delimiter-token">, end: </code> }
+    alias:                  { start: <code class="anchor-token">, end: </code> }
+    anchor:                 { start: <code class="anchor-token">, end: </code> }
+    tag:                    { start: <code class="tag-token">, end: </code> }
+    scalar:                 { start: <code class="scalar-token">, end: </code> }
+
+events:
+    stream-start:   { start: <pre class="yaml-stream"> }
+    stream-end:     { end: </pre> }
+    document-start: { start: <span class="document"> }
+    document-end:   { end: </span> }
+    sequence:       { start: <span class="sequence"> }
+    mapping:        { start: <span class="mapping"> }
+    collection-end: { end: </span> }
+    scalar:         { start: <span class="scalar">, end: </span> }
+
+replaces: !!pairs
+    - "\r\n":   "\n"
+    - "\r":     "\n"
+    - "\n":     "\n"
+    - "\x85":   "\n"
+    - "\u2028": "\n"
+    - "\u2029": "\n"
+    - "<":      "&lt;"
+    - ">":      "&gt;"
+    - "&":      "&amp;"
+
+# vim: ft=yaml

lib/yaml/error.py

 
 class Mark:
 
-    def __init__(self, name, line, column, buffer, pointer):
+    def __init__(self, name, index, line, column, buffer, pointer):
         self.name = name
+        self.index = index
         self.line = line
         self.column = column
         self.buffer = buffer

lib/yaml/reader.py

 
     def get_mark(self):
         if self.stream is None:
-            return Mark(self.name, self.line, self.column,
+            return Mark(self.name, self.index, self.line, self.column,
                     self.buffer, self.pointer)
         else:
-            return Mark(self.name, self.line, self.column, None, None)
+            return Mark(self.name, self.index, self.line, self.column,
+                    None, None)
 
     def determine_encoding(self):
         while not self.eof and len(self.raw_buffer) < 2: