1. Maciej Fijalkowski
  2. hack2

Source

hack2 / codeviewer / xdemo.py

Diff from to

File codeviewer/xdemo.py

  • Ignore whitespace
 from pygments import highlight
 from pygments.lexers import PythonLexer
-from pygments.formatters import HtmlFormatter
+from pygments.formatters.html import HtmlFormatter, _escape_html_table
 from jinja2 import Template
 import json
 
     def __init__(self, codelines):
         self.codelines = codelines
         HtmlFormatter.__init__(self)
-    
+        self.mapping = [[]]
+
+    def _consider_html(self, p):
+        if len(p):
+            self.mapping[-1].append((False, len(p)))
+        return p
+
+    def _consider_txt(self, p):
+        if len(p):
+            if p.find("&") != -1:
+                assert p.endswith(";")
+                self.mapping[-1].append((False, len(p) - 1))
+                self.mapping[-1].append((True, 1))
+            else:
+                self.mapping[-1].append((True, len(p)))
+        return p
+
+    def _reset_line(self):
+        self.mapping.append([])
+
+    def _format_lines(self, tokensource):
+        """
+        Just format the tokens, without any wrapping tags.
+        Yield individual lines.
+
+        XXX a hack, copied from pygments directly
+        """
+        nocls = self.noclasses
+        lsep = self.lineseparator
+        # for <span style=""> lookup only
+        getcls = self.ttype2class.get
+        c2s = self.class2style
+        escape_table = _escape_html_table
+
+        lspan = ''
+        line = ''
+        for ttype, value in tokensource:
+            if nocls:
+                cclass = getcls(ttype)
+                while cclass is None:
+                    ttype = ttype.parent
+                    cclass = getcls(ttype)
+                cspan = cclass and '<span style="%s">' % c2s[cclass][0] or ''
+            else:
+                cls = self._get_css_class(ttype)
+                cspan = cls and '<span class="%s">' % cls or ''
+
+            parts = value.translate(escape_table).split('\n')
+
+            # for all but the last line
+            for part in parts[:-1]:
+                if line:
+                    if lspan != cspan:
+                        before = (lspan and '</span>') + cspan
+                        after = (cspan and '</span>') + lsep
+                        self._consider_html(before)
+                        self._consider_txt(part)
+                        self._consider_html(after)
+                        line += before + part + after
+                    else: # both are the same
+                        self._consider_txt(part)
+                        after = (lspan and '</span>') + lsep
+                        self._consider_html(after)
+                        line += part + after
+                    yield 1, line
+                    self._reset_line()
+                    line = ''
+                elif part:
+                    yield 1, (self._consider_html(cspan) +
+                              self._consider_text(part) +
+                              self._consider_html(cspan and '</span>') + lsep)
+                    self._reset_line()
+                else: 
+                    yield 1, self._consider_html(lsep)
+                    self._reset_line()
+            # for the last line
+            if line and parts[-1]:
+                if lspan != cspan:
+                    line += (self._consider_html((lspan and '</span>') +
+                                                cspan) +
+                             self._consider_txt(parts[-1]))
+                    lspan = cspan
+                else:
+                    line += self._consider_txt(parts[-1])
+            elif parts[-1]:
+                line = (self._consider_html(cspan) +
+                        self._consider_txt(parts[-1]))
+                lspan = cspan
+            # else we neither have to open a new span nor set lspan
+
+        if line:
+            yield 1, line + (lspan and '</span>') + lsep
+            self._reset_line()
+
     def wrap(self, source, outfile):
         total = 1
         for i, t in HtmlFormatter.wrap(self, source, outfile):
             else:
                 yield i, t
 
+
 FNAME = '/home/fijal/src/pypy/pypy/translator/goal/richards.py'
 code = open(FNAME, 'r').read()
 with open('pygments.css', 'w') as f:
 with open('main.html', 'w') as f:
     f.write(Template(open("file.jinja").read()).render())
 with open("x.html", "w") as f:
-    source = highlight(code, PythonLexer(), CustomHtmlFormatter(code.splitlines()))
+    cf = CustomHtmlFormatter(code.splitlines())
+    source = highlight(code, PythonLexer(), cf)
+    sourcelines = source.splitlines()
     f.write(source)
 with open("x.json", "w") as f:
-    json.dump(code.splitlines(), f)
+    codelines = code.splitlines()
+    assert len(codelines) == len(cf.mapping) - 1
+    for ii, (cl, map) in enumerate(zip(codelines, cf.mapping)):
+        assert sum([i for (k, i) in map if k]) == len(cl), (map, cl, sourcelines[ii])
+    # we need to create a mapping for each character
+    line_mapping = []
+    for line, map in zip(codelines, cf.mapping):
+        new_line = []
+        counter = 0
+        last_good_counter = 0
+        for flag, lgt in map:
+            if not flag:
+                counter += lgt
+            else:
+                for i in range(lgt):
+                    new_line.append(counter)
+                    counter += 1
+                    last_good_counter = counter
+        new_line.append(last_good_counter)
+        line_mapping.append(new_line)
+    json.dump({'codelines': codelines,
+               'line_mapping': line_mapping}, f)