Source

pypi / utils / charset.py

Full commit
import string
import re

chars = string.ascii_letters + string.digits

# poor man's markup heuristics so we don't have to use <PRE>,
# for when rst didn't work on the text...
br_patt = re.compile(" *\r?\n\r?(?= +)")
p_patt = re.compile(" *\r?\n(\r?\n)+")

def newline_to_br(text):
    text = re.sub(br_patt, "<br />", text)
    return re.sub(p_patt, "\n<p>\n", text)

def path2str(path):
    return " :: ".join(path)

def str2path(s):
    return [ node.strip() for node in s.split("::") ]

def utf8getter(n):
    def utf8get(fields):
        if fields[n] is None: return fields[n]
        return fields[n].decode('utf-8', 'replace')

    return utf8get