Source

palmagent / drupalbak.py

Full commit
'''drupalbak.py -- backup drupal blog

goal: convert to hAtom

refs:

$result = xmlrpc('http://example.com/xmlrpc.php'...)
xmlrpc | Drupal API
http://api.drupal.org/api/drupal/includes--common.inc/function/xmlrpc/7

Example: Accessing a service from Python | drupal.org
http://drupal.org/node/231117

'''

import sys
import pprint
import xmlrpclib
from xml.sax.saxutils import escape as xmlchars


def main(argv, u='connolly', blogid='2',
         addr="http://dig.csail.mit.edu/breadcrumbs/xmlrpc.php"):
    pw = argv[1]

    b = DrupalBlog(addr, blogid, u, pw)

    posts = b.list_posts()

    print "<!DOCTYPE html><html>"
    print "<head><meta charset='utf-8'><title>blog title?</title></head>"
    print "<body>"

    print "<div><h2>Table of Contents</h2>"
    print "<ul>"
    for entry in posts:
        print "<li>", cite(entry).encode('utf-8'), "</li>"
    print "</ul>"
    print "</div>"

    print "<div><h2>Full Text</h2>"
    for entry in posts:
        print >> sys.stderr, "getting", entry['postid'], entry['title']
        try:
            e = b.get_post(entry['postid'])
        except ValueError:
            print >> sys.stderr, "ValueError!"
            continue

        print hentry(e).encode('utf-8')
    print "</div>"

    print "</body></html>"
    #_api(pw)


class DrupalBlog(object):
    def __init__(self, addr, blogid, u, p):
        self._s = xmlrpclib.Server(addr)
        self._u = u
        self._p = p
        self._blogid = blogid

    def list_posts(self, qty=1000):
        return self._s.mt.getRecentPostTitles(self._blogid,
                                              self._u, self._p, qty)

    def get_post(self, postid):
        return self._s.metaWeblog.getPost(postid, self._u, self._p)


def cite(entry):
    template = '''
      <span>
        <cite class="entry-title">
          <a href="#post_%(postid)s">%(title)s</a>
        </cite>
        <br /><em class="published">%(dateCreated)s</em> by
        <strong class="author">%(userid)s</strong>
      </span>
      '''
    return template % _xmlvals(entry)


def _xmlvals(entry):
    return dict([(k, xmlchars(unicode(v)))
                 for k, v in entry.iteritems()])
    
def hentry(entry):
    template = '''
      <div class="hentry" id="post_%(postid)s">
        <h3 class="entry-title">%(title)s</h3>
        <a rel="bookmark" class="published" href="%(permaLink)s"
        >%(dateCreated)s</a> by <strong class="author">%(userid)s</strong>
        <!--
        %(pprint)s
        -->
        <div class="entry=content">
        %(content)s
        </div>
      </div>
      '''

    content = entry['content']
    if int(entry.get('mt_convert_breaks', 0)) == 1:
        content = '<br />'.join(content.split("\r\n")) #  todo: optional \r
    # um... what are the other convert_breaks values?
    # I think 4 = raw HTML
    # the other one that I used is 3. what is it?

    return template % dict(dict(_xmlvals(entry)),
                           content=content,
                           pprint=pprint.pformat(entry))


def _api(p, u='connolly'):
    s = xmlrpclib.Server("http://dig.csail.mit.edu/breadcrumbs/xmlrpc.php")
    methods = s.system.listMethods()
    print methods
    for m in methods:
        if not m.startswith('system.'):
            print m, s.system.methodHelp(m)
            print m, s.system.methodSignature(m)
            print


if __name__ == '__main__':
    main(sys.argv)