1. Adam Knight
  2. milton


milton / drupal_support / management / commands / convertdrupal.py

import datetime
import logging
import re
from optparse import make_option

from django.db.models import Q
from django.contrib.sites.models import Site
from django.contrib.auth.models import User
from django.contrib.contenttypes.models import ContentType
from django.core.management.base import BaseCommand, CommandError

from milton.drupal_support.models import *
from milton.models import Section, Page, Story, Redirect, ContentHistory, MiltonSite
from milton.templatetags.slugify import slugify

from milton.threadedcomments.models import MPTTComment

def vancode2int(vancode):
    if len(vancode):
        result = int(vancode[1:], 36)
        result = None
    return result

class Command(BaseCommand):
    option_list = BaseCommand.option_list + (
    #     make_option('--format', default='json', dest='format', help='Specifies the output serialization format for fixtures.'),
    #     make_option('--indent', default=None, dest='indent', type='int', help='Specifies the indent level to use when pretty-printing output'),
    #     make_option('-e', '--exclude', dest='exclude',action='append', default=[], help='App to exclude (use multiple --exclude to exclude multiple apps).'),
    #     make_option('-v', '--verbose', dest='verbose', action="store", default="0", type="choice", choices=['0','1','2'], help='Verbose output'),
        make_option(None,'--comments', dest='comments', action="store_true", default=True, help="Import comments (default)."),
        make_option(None,'--no-comments', dest='comments', action="store_false", default=True, help="Do not import comments."),
        make_option(None,'--site-id', dest='site_id', action="store", help="Site ID to import records to."),
        make_option(None,'--site-url', dest='site_url', action="store", help="Lookup an existing site by URL."),
    help = 'Convert Drupal 6 tables to SP objects. The tables must be in the same database as SP.'
    # args = '[appname ...]'
    def handle(self, *app_labels, **options):
        log = logging.getLogger("milton")
    	import_comments = options.get('comments', True)
        show_traceback = options.get('traceback', False)
        verbose = int(options.get('verbosity', 0))
        debug = (verbose == 2)
        user = User.objects.get(pk=1)
        if debug:
        elif verbose:
        log.info("Starting Drupal conversion.")
        log.debug("* Importing data with %s as the content owner." % (user.username,))
        # Prefetch the destination site
        if options.get("site_id", None):
            site_obj = MiltonSite.objects.get(pk=options.get("site_id"))
        elif options.get("site_url", None):
            site_obj = MiltonSite.objects.get_site_for_URL(options.get("site_url"))
        if not site_obj:
            raise CommandError("No site specified. Use either --site-id or --site-url to specify a site for the import.")
        # Follow the rabbit
        # while site_obj.alias_for:
            # site_obj = site_obj.alias_for
        log.info("* Using site %s" % (site_obj.name,))
            # Get a list of nodes together and filter to core nodes.
            node_list = DrupalNode.objects.filter(type__in=("blog", "story", "page")).order_by("nid")
            log.info("Found %d nodes." % len(node_list))
            if len(node_list) == 0:
                raise CommandError("No supported Drupal nodes found in the current database.")
            # Create some sections
            (blog_section, c) = Section.objects.get_or_create(name="Blog", slug="blog")
            if c:
                log.debug("* Created blog section.")
            (story_section, c) = Section.objects.get_or_create(name="Story", slug="story")
            if c:
                log.debug("* Created story section.")
            blog_section = None # I don't want to change the URLs for these.
            for node in node_list:
                # Our document object
                obj = None
                # See if we've imported this node before
                node_aliases = Redirect.objects.filter(original='/node/%d' % node.nid, site=site_obj)
                if node_aliases.count() > 0:
                    log.info("Skipping node %d" % node.nid)
                # We haven't imported this.  Let's begin...
                log.info("Processing node (%d) %s" % (node.nid, node.title))
                if node.type == "blog" or node.type == "story":
                    if node.type == "blog":
                        log.debug("Node is a blog.")
                        section = blog_section
                        log.debug("Node is a story.")
                        section = story_section
                    # Create an Story for the node
                    obj = Story(
                        user = user,
                        title = node.title,
                        date_created = datetime.datetime.fromtimestamp(node.created),
                        date_modified = datetime.datetime.fromtimestamp(node.changed),
                        date_published = datetime.datetime.fromtimestamp(node.created),
                        slug = slugify(node.title),
                        allow_comments = node.comment,
                        status = node.status,
                        section = section,
                    # Set the current content value
                        contents = node.current_revision.get_parsed_contents()
                        obj.teaser = contents['teaser']
                        obj.content = contents['body']
                    except DrupalNodeRevision.DoesNotExist, e:
                        log.info("WARNING: No body content found for %s (%d)" % (obj.title, obj.id))
                    # Save the story
                    # Add to the site
                    obj.site = site_obj
                    # Import terms as tags
                    terms = node.current_revision.terms.all()
                    tags = [t.name for t in terms]
                    obj.tags = ','.join(tags) + ','
                    log.debug("* Set tags to: %s" % (obj.tags,) )
                    # Ensure the modification date is proper
                    obj.date_modified = datetime.datetime.fromtimestamp(node.changed)
                elif node.type == "page":
                    log.debug("Node is a page.")
                    # Create an object for the node
                    obj = Page(
                        title = node.title,
                        date_created = datetime.datetime.fromtimestamp(node.created),
                        date_modified = datetime.datetime.fromtimestamp(node.changed),
                        date_published = datetime.datetime.fromtimestamp(node.created),
                        status = node.status,
                    # Set the current content value
                        obj.content = node.current_revision.body
                    except DrupalNodeRevision.DoesNotExist, e:
                        log.info("WARNING: No body content found for %s (%d)" % (obj.title, obj.id))
                    # Save
                    # Add to the current site
                    obj.site = site_obj
                    # Find the most recent URL for this object and assign it
                    aliases = DrupalUrlAlias.objects.filter(src='node/%d' % node.nid).order_by('-pid')
                    if aliases.count():
                        log.debug("* Page aliases: %s" % (aliases,))
                        obj.url = '/' + aliases[0].dst
                    # Ensure the modification date is proper
                    obj.date_modified = datetime.datetime.fromtimestamp(node.changed)
                    # Bail if we don't have something to work on
                    log.info("Unsupported type:", node.type)
                # Lookup any URL aliases for this node and create redirects
                aliases = DrupalUrlAlias.objects.filter(src='node/%d' % node.nid)
                # First, the core "node/1" links should still work
                r = Redirect(original="/node/%d" % node.nid, target_object=obj, site=site_obj)
                # Now we get any others that were made
                for alias in aliases:
                    dst = "/" + alias.dst
                    if obj.__class__ == Page and dst == obj.url: continue
                        r = Redirect(original=dst, target_object=obj, site=site_obj)
                        log.debug(" Created redirect from %s" % (dst,))
                    except Exception, e:
                        log.info("* Failed to create redirect from %s to %s :%s" % (dst, obj, e))
                # Create a ContentHistory for each old body value
                if node.revisions.count() > 1:
                    for revision in node.revisions.all()[1:]:
                        date = datetime.datetime.fromtimestamp(revision.timestamp)
                        contents = revision.get_parsed_contents()
                        ch = ContentHistory(
                            date_created = date,
                            date_modified = date,
                            owner = obj,
                            field_name = "teaser",
                            content = contents['teaser'],
                        ch = ContentHistory(
                            date_created = date,
                            date_modified = date,
                            owner = obj,
                            field_name = "content",
                            content = contents['body'],
                        log.debug(" Added revision %s" % (ch.date_created,))
                # Bring over the comments
                if import_comments:
                    comments = self.get_comments_for_node(node)
                    comments = []
                # log.debug("  Comments:", comments.count())
                for comment in comments:
                    log.debug("  Importing comment: %s %s" % (comment.thread, comment.subject))
                    # To handle the nested comments:
                    #     Break up the thread property: 01.01.01 -> [1,1,1]
                    #     Pop off the last item, as that is the location of the current comment (which we cannot directly set)
                    #     For each remaining level, get the right relationship in a loop:
                    #         Set an object to the first listed comment.
                    #         Pop that value off the array.
                    #         If there're more items left, set the loop object to that child of the current object.
                    #         When we run out of list items, we found the parent.
                    # to to [01,00,05,07]
                    parents = comment.thread[:-1].split('.')
                    # [1,0,5,7]
                    parents = map(vancode2int, parents)
                    # [1,0,5]
                    parents = parents[:-1]
                    if len(parents):
                        # [0,0,5]
                        parents[0] = parents[0] - 1
                    log.debug("** Parents: %s (%s)" % (parents, comment.thread))
                    c_obj = None
                    if len(parents):
                        ct = ContentType.objects.get_for_model(obj.__class__)
                        obj_comments = MPTTComment.objects.filter(content_type=ct, object_pk=str(obj.id)).order_by('submit_date')
                        if debug:
                            print "* Object comments (%d):" % obj_comments.count(), obj_comments
                            print "* Parents (%d):" % len(parents), parents
                            c_obj = obj_comments[parents[0]]
                        except IndexError, e:
                            log.info("*** Parent of comment subtree not found.  This can happen if a thread has a deleted comment.")
                        parents = parents[1:]
                        while c_obj and len(parents):
                            if c_obj.children.count() > parents[0]:
                                c_obj = c_obj.children.all()[parents[0]]
                                parents = parents[1:]
                                log.info("** Object has %d comments, but the parent should be item %d" % (c_obj.children.count(), parents[0]))
                        if debug and c_obj: print "** Suspected parent:", c_obj.title
                    # Actually create the comment now
                    c = MPTTComment(
                        content_object = obj,
                        title = unicode(comment.subject)[:200],
                        ip_address = unicode(comment.hostname)[:15],
                        user_name = unicode(comment.name)[:50],
                        user_email = unicode(comment.mail)[:75],
                        user_url = unicode(comment.homepage)[:200],
                        submit_date = datetime.datetime.fromtimestamp(comment.timestamp),
                        comment = unicode(comment.comment),
                        site = Site.objects.get_current(),
                        is_public = (not comment.status),
                    # Now that the comment exists, we can set the parent.
                    c.parent = c_obj
                    log.info(" Imported comment (%d) %s" % (comment.cid, comment.subject))
                log.debug(" Finished node: %s (%s)" % (obj.title, obj.get_absolute_url()))
        except Exception, e:
            if show_traceback:
            raise CommandError("Error: %s" % e)
    def get_comments_for_node(self, node):
        This generally requires a "real" database like Postgresql or MySQL.  Sqlite does not support SUBSTRING.
        But, that's okay.  Drupal required MySQL anyway, so do the conversion on a copy of the DB there, then
        migrate to whatever else you want to use.
        raw_comments = DrupalComment.objects.db_manager('drupal').raw("SELECT * FROM comments WHERE nid = %s ORDER BY SUBSTRING(comments.thread, 1, (LENGTH(comments.thread) - 1))", [node.nid])
        comments = []
        for comment in raw_comments:
        return comments