Source

milton / drupal_support / management / commands / convertdrupal.py

import datetime
import re
from optparse import make_option

from django.db.models import Q
from django.contrib.auth.models import User
from django.contrib.contenttypes.models import ContentType
from django.contrib.sites.models import Site
from django.core.management.base import BaseCommand, CommandError

from milton.drupal_support.models import *
from milton.models import *
from milton.templatetags.slugify import slugify

from milton.threadedcomments.models import MPTTComment

def vancode2int(vancode):
    if len(vancode):
        result = int(vancode[1:], 36)
    else:
        result = None
    return result

class Command(BaseCommand):
    option_list = BaseCommand.option_list + (
    #     make_option('--format', default='json', dest='format', help='Specifies the output serialization format for fixtures.'),
    #     make_option('--indent', default=None, dest='indent', type='int', help='Specifies the indent level to use when pretty-printing output'),
    #     make_option('-e', '--exclude', dest='exclude',action='append', default=[], help='App to exclude (use multiple --exclude to exclude multiple apps).'),
    #     make_option('-v', '--verbose', dest='verbose', action="store", default="0", type="choice", choices=['0','1','2'], help='Verbose output'),
        make_option(None,'--comments', dest='comments', action="store_true", default=True, help="Import comments (default)."),
        make_option(None,'--no-comments', dest='comments', action="store_false", default=True, help="Do not import comments."),
    )
    help = 'Convert Drupal 6 tables to SP objects.    The tables must be in the same database as SP.'
    # args = '[appname ...]'
    
    def handle(self, *app_labels, **options):
	import_comments = options.get('comments', True)
        show_traceback = options.get('traceback', False)
        verbose = int(options.get('verbosity', 0))
        debug = (verbose == 2)
        user = User.objects.get(pk=1)
        
        if verbose: print "Starting Drupal conversion."
        if debug: print "* Debug logging enabled."
        if debug: print "* Importing data with %s as the content owner." % user.username
        
        try:
            # Get a list of nodes together and filter out blog nodes.
            node_list = DrupalNode.objects.filter(type__in=("blog", "story", "page")).order_by("nid")
            if verbose: print "Found %d nodes." % len(node_list)
            
            if len(node_list) == 0:
                raise CommandError("No Drupal nodes found in the current database.")
            
            # Create some sections
            (blog_section, c) = Section.objects.get_or_create(name="Blog", slug="blog")
            if c:
                if debug: print "* Created blog section."
                blog_section.save()
            
            (story_section, c) = Section.objects.get_or_create(name="story", slug="story")
            if c:
                if debug: print "* Created story section."
                story_section.save()
    
            blog_section = None # I don't want to change the URLs for these.
    
            for node in node_list:
                # Our document object
                obj = None
                
                # See if we've imported this node before
                node_aliases = Redirect.objects.filter(original='/node/%d' % node.nid)
                if node_aliases.count() > 0:
                    if verbose: print "Skipping node %d" % node.nid
                    continue
                
                # We haven't imported this.  Let's begin...
                if verbose: print "Processing node (%d) %s" % (node.nid, node.title)
                
                if node.type == "blog" or node.type == "story":
                    if node.type == "blog":
                        if debug: print "Node is a blog."
                        section = blog_section
                    else:
                        if debug: print "Node is a story."
                        section = story_section
                    
                    # Create an Story for the node
                    obj = Story(
                        user = user,
                        title = node.title,
                        date_created = datetime.datetime.fromtimestamp(node.created),
                        date_modified = datetime.datetime.fromtimestamp(node.changed),
                        date_published = datetime.datetime.fromtimestamp(node.created),
                        slug = slugify(node.title),
                        allow_comments = node.comment,
                        status = node.status,
                        section = section,
                        )
                
                    # Set the current content value
                    try:
                        contents = node.current_revision.get_parsed_contents()
                    
                        obj.teaser = contents['teaser']
                        obj.content = contents['body']
                    except DrupalNodeRevision.DoesNotExist, e:
                        if verbose: print "WARNING: No body content found for %s (%d)" % (obj.title, obj.id)
                
                    # Save the story
                    obj.save()
                    
                    # Add to the current site
                    obj.sites.add(Site.objects.get_current())
                    
                    # Import terms as tags
                    terms = node.current_revision.terms.all()
                    tags = []
                    for term in terms:
                        tags.append(term.name)
                    obj.tags = ','.join(tags) + ','
                    if debug: print "* Set tags to: %s" % obj.tags
                    
                    # Ensure the modification date is proper
                    obj.date_modified = datetime.datetime.fromtimestamp(node.changed)
                    obj.save()
                    
                elif node.type == "page":
                    if debug: print "Node is a page."
                    
                    # Create an object for the node
                    obj = Page(
                        title = node.title,
                        date_created = datetime.datetime.fromtimestamp(node.created),
                        date_modified = datetime.datetime.fromtimestamp(node.changed),
                        date_published = datetime.datetime.fromtimestamp(node.created),
                        status = node.status,
                        )
                    
                    # Set the current content value
                    try:
                        obj.content = node.current_revision.body
                    except DrupalNodeRevision.DoesNotExist, e:
                        if verbose: print "WARNING: No body content found for %s (%d)" % (obj.title, obj.id)
                    
                    # Save
                    obj.save()
                    
                    # Add to the current site
                    obj.sites.add(Site.objects.get_current())
                    
                    # Find the most recent URL for this object and assign it
                    aliases = DrupalUrlAlias.objects.filter(src='node/%d' % node.nid).order_by('-pid')
                    if aliases.count():
                        if debug: print "* Page aliases:", aliases
                        obj.url = '/' + aliases[0].dst
                    
                    # Ensure the modification date is proper
                    obj.date_modified = datetime.datetime.fromtimestamp(node.changed)
                    obj.save()
                
                else:
                    # Bail if we don't have something to work on
                    if verbose: print "Unsupported type:", node.type
                    continue
                
                # Lookup any URL aliases for this node and create redirects
                aliases = DrupalUrlAlias.objects.filter(src='node/%d' % node.nid)
                
                # First, the core "node/1" links should still work
                r = Redirect(original="/node/%d" % node.nid, target_object=obj)
                r.save()
                
                # Now we get any others that were made
                for alias in aliases:
                    dst = "/" + alias.dst
                    if obj.__class__ == Page and dst == obj.url: continue
                    try:
                        r = Redirect(original=dst, target_object=obj)
                        r.save()
                        if debug: print " Created redirect from", dst
                    except Exception, e:
                        if verbose: print "* Failed to create redirect from", dst, "to", obj, ":", e
                        continue
                
                # Create a ContentHistory for each old body value
                if node.revisions.count() > 1:
                    for revision in node.revisions.all()[1:]:
                        date = datetime.datetime.fromtimestamp(revision.timestamp)
                        
                        contents = revision.get_parsed_contents()
                        
                        ch = ContentHistory(
                            date_created = date,
                            date_modified = date,
                            owner = obj,
                            field_name = "teaser",
                            content = contents['teaser'],
                        )
                        ch.save()
                        
                        ch = ContentHistory(
                            date_created = date,
                            date_modified = date,
                            owner = obj,
                            field_name = "content",
                            content = contents['body'],
                        )
                        ch.save()
                        
                        if debug: print " Added revision %s" % (ch.date_created,)
                
                # Bring over the comments
                if import_comments:
                    comments = self.get_comments_for_node(node)
                else:
                    comments = []
                
                # if debug: print "  Comments:", comments.count()
                for comment in comments:
                    if debug: print "  Importing comment: ", comment.thread, comment.subject
                    
                    # To handle the nested comments:
                    #     Break up the thread property: 01.01.01 -> [1,1,1]
                    #     Pop off the last item, as that is the location of the current comment (which we cannot directly set)
                    #     For each remaining level, get the right relationship in a loop:
                    #         Set an object to the first listed comment.
                    #         Pop that value off the array.
                    #         If there're more items left, set the loop object to that child of the current object.
                    #         When we run out of list items, we found the parent.
                    
                    # 01.00.05.07/ to 01.00.05.07 to [01,00,05,07]
                    parents = comment.thread[:-1].split('.')
                    # [1,0,5,7]
                    parents = map(vancode2int, parents)
                    # [1,0,5]
                    parents = parents[:-1]
                    if len(parents):
                        # [0,0,5]
                        parents[0] = parents[0] - 1
                    
                    if debug: print "** Parents:", parents, "(%s)" % comment.thread
                    
                    c_obj = None
                    if len(parents):
                        ct = ContentType.objects.get_for_model(obj.__class__)
                        obj_comments = MPTTComment.objects.filter(content_type=ct, object_pk=str(obj.id)).order_by('submit_date')
                        if debug:
                            print "* Object comments (%d):" % obj_comments.count(), obj_comments
                            print "* Parents (%d):" % len(parents), parents
                        try:
                            c_obj = obj_comments[parents[0]]
                        except IndexError, e:
                            if verbose: print "*** Parent of comment subtree not found.  This can happen if a thread has a deleted comment."
                        parents = parents[1:]
                        while c_obj and len(parents):
                            if c_obj.children.count() > parents[0]:
                                c_obj = c_obj.children.all()[parents[0]]
                                parents = parents[1:]
                            else:
                                if verbose: print "** Object has %d comments, but the parent should be item %d" % (c_obj.children.count(), parents[0])
                                break
                        if debug and c_obj: print "** Suspected parent:", c_obj.title
                    
                    # Actually create the comment now
                    c = MPTTComment(
                        content_object = obj,
                        title = unicode(comment.subject)[:200],
                        ip_address = unicode(comment.hostname)[:15],
                        user_name = unicode(comment.name)[:50],
                        user_email = unicode(comment.mail)[:75],
                        user_url = unicode(comment.homepage)[:200],
                        submit_date = datetime.datetime.fromtimestamp(comment.timestamp),
                        comment = unicode(comment.comment),
                        site = Site.objects.get_current(),
                        is_public = (not comment.status),
                    )
                    c.save()
                    
                    # Now that the comment exists, we can set the parent.
                    c.parent = c_obj
                    c.save()
                    
                    if verbose: print " Imported comment (%d) %s" % (comment.cid, comment.subject)
                
                if debug: print " Finished node: %s (%s)" % (obj.title, obj.get_absolute_url())
            
        except Exception, e:
            if show_traceback:
                raise
            raise CommandError("Error: %s" % e)
        
        if verbose: print "Done."
    
    def get_comments_for_node(self, node):
        '''
        This generally requires a "real" database like Postgresql or MySQL.  Sqlite does not support SUBSTRING.
        But, that's okay.  Drupal required MySQL anyway, so do the conversion on a copy of the DB there, then
        migrate to whatever else you want to use.
        '''
        raw_comments = DrupalComment.objects.db_manager('drupal').raw("SELECT * FROM comments WHERE nid = %s ORDER BY SUBSTRING(comments.thread, 1, (LENGTH(comments.thread) - 1))", [node.nid])
        comments = []
        for comment in raw_comments:
            comments.append(comment)
        return comments