Source

articledump / articledump / restructuredtext.py

from docutils import io
from docutils.core import publish_doctree, Publisher
from docutils.readers.doctree import Reader
from docutils.nodes import docinfo
from datetime import datetime
from logbook import Logger

log = Logger('archivedump rst loader')

def get_docinfo(docinfo, name, astext=True):
    node = None
    for item in docinfo:
        if item.tagname == name:
            node = item
            break
        elif item.tagname=='field':
            if item[0].astext() == name:
                node = item[1]
                break
    if node is not None:
        return node if not astext else node.astext()

class Info(object):
    def __init__(self, document, source, root):
        self.document = document
        self.source = source
        self.root = root
        self.docinfo = next(c for c in document.children if c.__class__ is docinfo)
        print list(self.docinfo)

    @property
    def title(self):
        return self.document['title']

    @property
    def slug(self):
        return get_docinfo(self.docinfo, 'slug') or self.document['ids'][0]

    @property
    def date(self):
        date = get_docinfo(self.docinfo, 'date')
        if date:
            return datetime.strptime(date, '%Y-%m-%d').date()


    def build_html_parts(self):
        reader = Reader(parser_name='null')
        pub = Publisher(reader, None, None,
                    source=io.DocTreeInput(self.document),
                    destination_class=io.StringOutput)
        pub.set_writer('html')
        pub.process_programmatic_settings(None, None, None)
        pub.set_destination(None, None)
        pub.publish()
        parts = pub.writer.parts.copy()
        del parts['whole']
        del parts['stylesheet']
        return parts




def load_rst(file, root):
    document = publish_doctree(file.read(), source_path=root.bestrelpath(file))
    return Info(document, file, root)