Source

hgbook / ja / fixhtml.py

#!/usr/bin/env python
#
# This script attempts to work around some of the more bizarre and
# quirky behaviours of htlatex.
#
# - We've persuaded htlatex to produce UTF-8, which unfortunately
#   causes it to use huge character sequences to represent even the
#   safe 7-bit ASCII subset of UTF-8.  We fix that up.
#
# - BUT we have to treat angle brackets (for example, redirections in
#   shell script snippets) specially, otherwise they'll break the
#   generated HTML.  (Reported by Johannes Hoff.)
#
# - For some reason, htlatex gives a unique ID to each fancyvrb
#   environment, which makes writing a sane, small CSS stylesheet
#   impossible.  We squish all those IDs down to nothing.

import os
import sys
import re

angle_re = re.compile(r'(&#x003[CE];)')
unicode_re = re.compile(r'&#x00([0-7][0-9A-F]);')
fancyvrb_re = re.compile(r'id="fancyvrb\d+"', re.I)
ligature_re = re.compile(r'&#xFB0([0-4]);')

tmpsuffix = '.tmp.' + str(os.getpid())

def hide_angle(m):
    return m.group(1).lower()

def fix_ascii(m):
    return chr(int(m.group(1), 16))

ligatures = ['ff', 'fi', 'fl', 'ffi', 'ffl']

def expand_ligature(m):
    return ligatures[int(m.group(1))]

for name in sys.argv[1:]:
    tmpname = name + tmpsuffix
    ofp = file(tmpname, 'w')
    for line in file(name):
        line = angle_re.sub(hide_angle, line)
        line = unicode_re.sub(fix_ascii, line)
        line = ligature_re.sub(expand_ligature, line)
        line = fancyvrb_re.sub('id="fancyvrb"', line)
        ofp.write(line)
    ofp.close()
    os.rename(tmpname, name)
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.