1. Georg Brandl
  2. pygments-main
  3. Issues
Issue #657 new

new formatter request: wx.html (html3 compatible, with patch)

Stephen_Simmons
created an issue

When trying to integrate Pygments with wx.html.HtmlEasyPrinting, I discovered that Pygments' HTML formatter produces HTML that is too complicated for the simple HTML parser in wx.html.

Here is an HTML3 formatter suitable use for wx.html. Hopefully one of Pygments' core developers could quickly add it to the project with only minor changes.

{{{

!python

import pygments.formatter, pygments.token import pygments.lexers, pygments.formatters, pygments.styles

class WXHtmlFormatter(pygments.formatter.Formatter): """ Format in very simple HTML that suits wx.html. """ name = 'wxHTML' aliases = ['wxhtml', 'html3' ] filenames = []

unicodeoutput = False

def __init__(self, **options):
    pygments.formatter.Formatter.__init__(self, **options)
    self.encoding = 'ascii'  # let pygments.format() do the right thing

def format(self, tokensource, outfile):
    line_num_width = 5
    line_num_freq = 5
    style_dict = dict(self.style.list_styles())

    def do_token(ttype, value, style_dict):
        """
        Return HTML3 version of a token string
        """
        s = value
        try:
            d = style_dict[ttype]
        except KeyError:
            print "ERROR: Can't find %r in style dict" % ttype
            return u'<FONT COLOR="#FF0000">ERROR: Unknown style %r</FONT>' % (ttype, )
        if d['color']:
            s = u'<FONT COLOR="#%s">%s</FONT>' % (d['color'], s)
        if d['bold']:
            s = u'<B>%s</B>' % s
        if d['italic']:
            s = u'<I>%s</I>' % s
        if d['underline']:
            s = u'<U>%s</U>' % s
        if d['bgcolor'] or d['border']:
            pass        # Currently ignored
        return s

    def escape_html(text):
        """Escape &, <, > as well as single and double quotes for HTML."""
        text = text.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
        return text.replace('"', '&quot;').replace("'", '&#39;').replace(" ", '&nbsp;')

    def do_line(lines, bits, line_num):
        line_num += 1
        if line_num==1 or (line_num % line_num_freq==0):
            prefix = '%-*d ' % (line_num_width, line_num)
        else:
            prefix = ' ' * (line_num_width + 1)
        prefix = prefix.replace(' ', '&nbsp;')
        prefix = '<FONT COLOR="#A0A0A0">%s</FONT>' % prefix
        s = '%s%s<BR>' % (prefix, ''.join(bits))
        lines.append(s)
        return lines, [], line_num

    def formatted(tokensource):
        lines, bits, line_num = [], [], 0
        for ttype, value in tokensource:
            value = escape_html(value)
            if ttype==pygments.token.Token.Text and value=='\n':
                lines, bits, line_num = do_line(lines, bits, line_num)
            else:
                if '\n' in value:
                    parts = value.split('\n')
                    for part in parts[:-1]:
                        bits.append(do_token(ttype, part, style_dict))
                        lines, bits, line_num = do_line(lines, bits, line_num)
                    value = parts[-1]
                bits.append(do_token(ttype, value, style_dict))
        if bits:
            lines, bits, line_num = do_line(lines, bits, line_num)
        return '\n'.join(lines)

    wrapper = '<HTML><HEAD></HEAD><BODY><TT>%s</TT></BODY>'
    res = wrapper % formatted(tokensource)
    outfile.write(res)
    outfile.flush()

}}}