David Chambers avatar David Chambers committed c3bff0c

Updated WordPress conversion script to use new excerpt formatting.

Comments (0)

Files changed (1)

 import mango.settings
 from mango.utils import lstrip, posts_directory, unescape
 
+html2text.BODY_WIDTH = mango.settings.BODY_WIDTH
+delim = u' '.join(['*'] * int(round(html2text.BODY_WIDTH / 2))) + u'\n'
+
 # assign arguments to variables
 try:
     blog_url, username, password = sys.argv[1:]
     sys.stderr.write('Failed to find blog. Double-check supplied arguments.\n')
     sys.exit(1)
 
+convert = lambda html: html2text.html2text(html).strip() + u'\n'
+
 posts_directory()  # create posts directory if necessary
 
 pages = server.wp.getPages(blog_id, username, password)
 
 for page in pages:
-    html2text.BODY_WIDTH = mango.settings.BODY_WIDTH
     with open(os.path.join(mango.settings.DOCUMENTS_PATH, '%s.text' % page['wp_slug']), 'w') as f:
-        f.write(u'author: %s\n\n\n%s\n%s\n\n%s\n' % (
-                page['wp_author_display_name'],
-                page['title'], '=' * len(page['title']),
-                html2text.html2text(page['description']).strip()))
+        f.write(u'author: %s\n\n\n# %s\n\n%s' %
+                (page['wp_author_display_name'], page['title'],
+                 convert(page['description'])))
 
 posts = server.metaWeblog.getRecentPosts(blog_id, username, password, 0)
 
 for post in posts:
 
-    lines = []
+    text = u''
 
     dt = datetime.strptime(post['date_created_gmt'].value, '%Y%m%dT%H:%M:%S')
     dt = dt.replace(tzinfo=pytz.utc).astimezone(pytz.timezone(TIME_ZONE))
-    lines.append(u'date: %s' % lstrip(dt.strftime(mango.settings.MARKDOWN_DATE_FORMAT)))
-    lines.append(u'time: %s' % lstrip(dt.strftime(mango.settings.MARKDOWN_TIME_FORMAT)).lower())
-    lines.append(u'zone: %s' % TIME_ZONE)
-    lines.append(u'author: %s' % post['wp_author_display_name'])
+    text += u'date: %s\n' % lstrip(dt.strftime(mango.settings.MARKDOWN_DATE_FORMAT))
+    text += u'time: %s\n' % lstrip(dt.strftime(mango.settings.MARKDOWN_TIME_FORMAT)).lower()
+    text += u'zone: %s\n' % TIME_ZONE
+    text += u'author: %s\n' % post['wp_author_display_name']
 
     if post['mt_keywords']:
-        lines.append(u'tags: %s' % unescape(post['mt_keywords']))
+        text += u'tags: %s' % unescape(post['mt_keywords'])
 
     if post['mt_excerpt']:  # hand-crafted excerpt
-        lines += ['', '']
-        html2text.BODY_WIDTH = mango.settings.BODY_WIDTH - 2
-        for line in html2text.html2text(post['mt_excerpt']).strip().splitlines():
-            lines.append(u'| %s' % line)
+        text += u'\n\n%s' % convert(post['mt_excerpt'])
 
-    lines += ['', '', post['title'], '=' * len(post['title']), '']
+    text += u'\n\n# %s\n\n' % post['title']
 
-    html2text.BODY_WIDTH = mango.settings.BODY_WIDTH
-    lines.append(html2text.html2text(post['description']).strip())
+    # everything before the <!--more-->, or the entire post body
+    excerpt = convert(post['description'])
+    # everything after the <!--more-->, or nothing
+    rest = post['mt_text_more']
+    text += u'\n'.join([delim, excerpt, delim, convert(rest)]) if rest else excerpt
 
-    if post['mt_text_more']:  # <!--more-->
-        lines.append('')
-        html2text.BODY_WIDTH = mango.settings.BODY_WIDTH - 2
-        for line in html2text.html2text(post['mt_text_more']).strip().splitlines():
-            lines.append(u'| %s' % line)
+    path = os.path.join(mango.settings.DOCUMENTS_PATH, post['wp_slug'] + '.text')
+    with open(path, 'w') as f:
+        f.write(text)
 
-    with open(os.path.join(mango.settings.DOCUMENTS_PATH, '%s.text' % post['wp_slug']), 'w') as f:
-        f.write(u'%s\n' % '\n'.join(lines))
-
-sys.stdout.write('Successfully imported %s %s and %s %s.\n' % (
-        len(pages), len(pages) == 1 and 'page' or 'pages',
-        len(posts), len(posts) == 1 and 'post' or 'posts'))
+sys.stdout.write('Successfully imported %s %s and %s %s.\n' %
+                 (len(pages), len(pages) == 1 and 'page' or 'pages',
+                  len(posts), len(posts) == 1 and 'post' or 'posts'))
 sys.exit()
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.