Commits

David Chambers  committed 180b176

Made Blogger conversion script more consistent with the WordPress one.

  • Participants
  • Parent commits c3bff0c
  • Branches 0.8

Comments (0)

Files changed (1)

File extras/blogger.py

 import mango.settings
 from mango.utils import lstrip, posts_directory
 
+html2text.BODY_WIDTH = mango.settings.BODY_WIDTH
+
 TZ_OFFSET = re.compile(r'(?P<sign>[+-])(?P<hours>\d\d):?(?P<minutes>\d\d)$')
 LINE_BREAKS = re.compile(r'\r\n?|<br( ?/)?>')
 BLOGGER_POST_FOOTER = re.compile(r'<div class="blogger-post-footer">.*?</div>')
 data = connection.read()
 connection.close()
 
-posts_directory() # create posts directory if necessary
+convert = lambda html: html2text.html2text(html).strip() + u'\n'
+
+posts_directory()  # create posts directory if necessary
 
 entries = simplejson.loads(data)['feed']['entry']
 
 for entry in entries:
 
-    lines = []
+    text = u''
 
     timestamp = entry['published']['$t']
 
 
     dt = datetime.strptime(timestamp[:19], '%Y-%m-%dT%H:%M:%S')
     dt = dt.replace(tzinfo=TZ(offset)).astimezone(pytz.timezone(TIME_ZONE))
-    lines.append(u'date: %s' % lstrip(dt.strftime(mango.settings.MARKDOWN_DATE_FORMAT)))
-    lines.append(u'time: %s' % lstrip(dt.strftime(mango.settings.MARKDOWN_TIME_FORMAT)).lower())
-    lines.append(u'zone: %s' % TIME_ZONE)
-    lines.append(u'author: %s' % entry['author'][0]['name']['$t'])
+    text += u'date: %s\n' % lstrip(dt.strftime(mango.settings.MARKDOWN_DATE_FORMAT))
+    text += u'time: %s\n' % lstrip(dt.strftime(mango.settings.MARKDOWN_TIME_FORMAT)).lower()
+    text += u'zone: %s\n' % TIME_ZONE
+    text += u'author: %s\n' % entry['author'][0]['name']['$t']
 
     tags = entry.get('category')
     if tags:
         tags = sorted([smart_unicode(tag['term']) for tag in tags], key=unicode.lower)
-        lines.append(u'tags: %s' % ', '.join(tags))
+        text += u'tags: %s\n' % ', '.join(tags)
 
-    title = entry['title']['$t']
-    lines += ['', '', title, '=' * len(title), '']
+    text += u'\n\n# %s\n\n' % entry['title']['$t']
 
     # tidy up Blogger's mess
     content = re.sub(LINE_BREAKS, '\n', entry['content']['$t'])
     content = re.sub(BLOGGER_POST_FOOTER, '', content)
 
-    # convert HTML to Markdown
-    html2text.BODY_WIDTH = mango.settings.BODY_WIDTH
-    content = html2text.html2text(content).strip()
-
-    lines.append(content)
+    text += convert(content)
 
     for link in entry['link']:
         if link['rel'] == 'alternate':
             if slug.endswith('.html'):
                 slug = slug[:-5]
             break
-    else: # create slug from post title
+    else:  # create slug from post title
         slug = re.sub(r'[^a-z0-9_-]', '', title.lower())
         slug = re.sub(r'\s+', '-', slug)
 
-    with open(os.path.join(mango.settings.DOCUMENTS_PATH, '%s.text' % slug), 'w') as f:
-        f.write(u'%s\n' % '\n'.join(lines))
+    path = os.path.join(mango.settings.DOCUMENTS_PATH, slug + '.text')
+    with open(path, 'w') as f:
+        f.write(text)
 
-sys.stdout.write('Successfully imported %s %s.\n' % (
-        len(entries), len(entries) == 1 and 'document' or 'documents'))
+sys.stdout.write('Successfully imported %s %s.\n' %
+                 (len(entries), len(entries) == 1 and 'document' or 'documents'))
 sys.exit()