Commits

Marcin Kasperski committed cdf4c90

First version. On my small blog it seems to work properly.

Comments (0)

Files changed (3)

+Scripts implemented file handle export from Serendipity
+(http://s9y.org) PHP blog and Blogofile (http://www.blogofile.com)
+static blog generator.
+

serendipity2blogofile.py

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Author: Marcin Kasperski
+#
+# Heavily inspired by wordpress2blogofile.py from blogofile contrib.
+# but handles also static pages and some config elements.
+
+"""Export a Serendipity blog to Blogofile /_posts directory format
+
+This file is MIT licensed, see http://blogofile.com/LICENSE.html for details.
+
+Requirements:
+
+  * An existing Serendipity database hosted on PostgreSQL
+
+  * SQLAlchemy and Postgres driver
+
+Usage:
+
+  * Edit database connection details below and other config elements below
+
+  * Execute
+
+    cd <... directory where output is to be created ...>
+    python /path/to/serendipity2blogofile.py
+
+    If everything worked right, this will create (in the current dir)
+    a:
+       _posts directory with blog posts
+       pages directory with static pages
+       _config_extra.py file with a few items to be copied to _config.py
+
+Important notes:
+
+   1. The script ignores permissions, groups, images, comments etc etc
+      (for comments see serendipity2disqus).  Only posts are exported
+      with their text, categories and tags.
+
+   2. Extended entries are separated from leading text by
+
+      <!--more-->
+
+      marker.  Use appropriate excerpt generating function to make use
+      of those.
+"""
+
+###########################################################################
+# Configuration requiring edits
+###########################################################################
+
+# Database connection details
+
+table_prefix = "s9en_"
+
+db_username  = "blogowner"
+db_password  = ""                    
+db_host      = "linode.mekk.waw.pl"  
+db_port      = "5432"
+db_database  = "BLOG" 
+db_conn      = "postgres://{db_username}:{db_password}@{db_host}:{db_port}/{db_database}".format(**locals())
+
+# Other config
+
+# Extension to use (should map to syntax used in blog or at least it's
+# majority)
+FILENAME_EXTENSION = "markdown"
+
+# Custom entry properties to be saved. List of pairs where
+# left is the serendipity name of custom property and right is
+# the name to be used in blogofile YAML
+CUSTOM_ENTRY_PROPERTIES = (
+    ('BodyClass', 'body_class'),
+    )
+
+###########################################################################
+# Importer code. No need to edit anything below
+###########################################################################
+
+import os
+import re
+import sys
+import yaml
+import codecs
+import datetime
+import sqlalchemy as sa
+import sqlalchemy.orm as orm
+from sqlalchemy.ext.declarative import declarative_base
+
+###########################################################################
+# SQLAlchemy objects
+###########################################################################
+
+engine = sa.create_engine(db_conn)
+Session = orm.scoped_session(
+    orm.sessionmaker(autocommit=False,
+                     autoflush=False,
+                     bind=engine))
+Base = declarative_base(bind=engine)
+
+session = Session()
+
+###########################################################################
+# Mapper objects
+###########################################################################
+
+class Author(Base):
+    """
+    Author information. Interesting fields:
+
+    authorid (numerical id)
+    username (nick)
+    realname (full true name)
+    email
+    """
+    __tablename__ = table_prefix + "authors"
+    __table_args__ = {'autoload': True}
+    
+
+class Category(Base):
+    """
+    Category information. Interesting fields:
+
+    category_id (numerical)
+    category_name
+    category_description
+    category_left, category_right, parentid (ordering, hierarchy)
+    """
+    __tablename__ = table_prefix + "category"
+    __table_args__ = {'autoload': True}
+
+class Config(Base):
+    """
+    Configuration table. Has fields name and value (and authorid but
+    who cares)
+
+    Intersting properties (names)
+
+    blogTitle, blogDescription, blogMail, lang, baseURL, 
+
+    permalinkStructure, permalinkAuthorStructure,
+    permalinkCategoryStructure, permalinkFeedCategoryStructure,
+    permalinkArchivePath, permalinkArchivesPath, permalinkFeedsPath
+
+    username, realname, email
+    """
+    __tablename__ = table_prefix + "config"
+    __table_args__ = {'autoload': True}
+    name = sa.Column("name", sa.String, primary_key = True)
+
+
+class Permalink(Base):
+    """
+    Entries and categories permalinks
+
+    permalink  (relative, for example "archives/44-My-Article.html")
+    entry_id   (numerical)
+    type       ("entry" or "category")
+
+    Note: as we don't use category permalink, I haven't fought
+    with SQLAlchemy tu polymorphically make entry_id foreign
+    key to either entries, or categories
+    """
+    __tablename__ = table_prefix + "permalinks"
+    __table_args__ = {'autoload': True}
+    entry_id = sa.Column("entry_id", sa.Integer,
+                         sa.ForeignKey(table_prefix + "entries.id"),
+                         primary_key = True)
+
+class EntryCat(Base):
+    """
+    Entry-Category connection. 
+
+    entryid
+    categoryid
+    """
+    __tablename__ = table_prefix + "entrycat"
+    __table_args__ = {'autoload': True}
+    entry_id = sa.Column(
+        "entryid", sa.Integer,
+        sa.ForeignKey(table_prefix + "entries.id"),
+        primary_key = True)
+    category_id = sa.Column(
+        "categoryid", sa.Integer,
+        sa.ForeignKey(table_prefix + "category.categoryid"),
+        primary_key = True)
+
+class EntryProperty(Base):
+    """
+    Entry properties. 
+
+    entryid,
+    property,
+    value
+
+    Interesting values for property (except caches):
+    'meta_description',
+    'meta_keywords'
+    and
+    'ep_SomeThing' for custom attribute SomeThin
+    (I get ep_BodyClass as I used BodyClass for some entries)
+    """
+    __tablename__ = table_prefix + "entryproperties"
+    __table_args__ = {'autoload': True}
+    entry_id = sa.Column(
+        "entryid", sa.Integer,
+        sa.ForeignKey(table_prefix + "entries.id"),
+        primary_key = True)
+    property = sa.Column(
+        "property", sa.String,
+        primary_key = True)
+
+class Entry(Base):
+    """
+    Actual entry.
+
+    Properties to be used directly:
+
+    id
+    title
+    body
+    extended
+    isdraft
+
+    author (object with attributes username, realname, email)
+
+    Other properties (including timestamp, author, authorid, 
+    last_modified and mapped from other tables) better should be
+    used by methods.
+    """
+    __tablename__ = table_prefix + "entries"
+    __table_args__ = {'autoload': True}
+
+    authorid = sa.Column("authorid", 
+                         sa.ForeignKey(table_prefix + "authors"))
+    author_nick = sa.Column("author",
+            sa.ForeignKey(table_prefix + 'authors.authorid'))
+    author = orm.relation("Author",
+                          primaryjoin="Entry.authorid == Author.authorid")
+    permalink_rel = orm.relation(
+        "Permalink",
+        primaryjoin="and_"
+        "(Entry.id == Permalink.entry_id, "
+        "Permalink.type == 'entry')",
+        uselist = False)
+    #category_rel = orm.relation(
+    #    "Category",
+    #    primaryjoin="Entry.id == EntryCat.entryid",
+    #    uselist = False)
+    category_rel = orm.relationship(
+        "Category", secondary = EntryCat.__table__, uselist = False,
+        )
+    tag_rel = orm.relation(
+        "EntryTag",
+        primaryjoin="Entry.id == EntryTag.entry_id"
+        )
+    property_rel = orm.relation(
+        "EntryProperty",
+        primaryjoin="Entry.id == EntryProperty.entry_id"
+        )
+
+    def creation_time(self):
+        return datetime.datetime.fromtimestamp(self.timestamp)
+
+    def last_modification_time(self):
+        return datetime.datetime.fromtimestamp(self.last_modified)
+
+    def permalink(self):
+        return self.permalink_rel.permalink
+
+    def category(self):
+        return self.category_rel.category_name
+
+    def tags(self):
+        return [ item.tag for item in self.tag_rel ]
+
+    def properties(self):
+        # I handle it in ugly way outside ORM session, someone
+        # better at SQLAlchemy could surely improve
+        if not hasattr(self, 'properties_dict'):
+            self.properties_dict = dict(
+                (item.property, item.value)
+                for item in  self.property_rel
+                if not item.property.startswith('ep_cache'))
+        return self.properties_dict
+
+    def property(self, name, default = None):
+        return self.properties().get(name, default)
+
+    def meta_description(self):
+        return self.property("meta_description")
+    def meta_keywords(self):
+        return self.property("meta_keywords")
+    def custom_property(self, property_name):
+        return self.property("ep_" + property_name)
+
+class EntryTag(Base):
+    """
+    Entry-Tag connection. 
+
+    entryid
+    tag
+    """
+    __tablename__ = table_prefix + "entrytags"
+    __table_args__ = {'autoload': True}
+    entry_id = sa.Column("entryid", sa.Integer,
+                         sa.ForeignKey(table_prefix + "entries.id"),
+                         primary_key = True)
+    tag = sa.Column("tag", sa.Integer,
+                    primary_key = True)
+
+class StaticPage(Base):
+    """
+    Static pages
+
+    id
+    parent_id
+    articleformat
+    articleformattitle
+    markup
+    pagetitle
+    permalink
+    pre_content
+    content
+    headline
+    filename
+    timestamp
+    last_modified
+    authorid
+    articletype
+    publishstatus
+    language
+    """
+    __tablename__ = table_prefix + "staticpages"
+    __table_args__ = {'autoload': True}
+
+    def creation_time(self):
+        return datetime.datetime.fromtimestamp(self.timestamp)
+
+    def last_modification_time(self):
+        return datetime.datetime.fromtimestamp(self.last_modified)
+
+    def is_draft(self):
+        return self.publishstatus and False or True
+
+
+###########################################################################
+# Database helpers
+###########################################################################
+
+def get_blog_posts():
+    """
+    Yields all blog posts found, including drafts. Use Entry class
+    methods to examine data.
+    """
+    for item in session.query(Entry).order_by(Entry.id):
+        yield item
+
+def get_static_pages():
+    for item in session.query(StaticPage).order_by(StaticPage.id):
+        yield item
+
+def get_config_item(item_name):
+    return session.query(Config).get(item_name).value
+
+###########################################################################
+# Output functions
+###########################################################################
+
+MORE_MARKER = "<!--more-->"
+
+def normalize_text(text):
+    """
+    Add any patches for entry text here
+    """
+    return text.replace(u"\r\n", u"\n")
+
+def write_post(output_filename, yaml_data, 
+               leading_text, extended_text):
+
+    full_filename = os.path.join("_posts", output_filename)
+    with codecs.open(full_filename, "w", "utf-8") as out:
+        out.write("---\n")
+        out.write(yaml.safe_dump(yaml_data,
+                                 default_flow_style=False,
+                                 allow_unicode=True).decode("utf-8"))
+        out.write("---\n")
+        out.write(normalize_text(leading_text))
+        out.write(
+            "\n\n" + MORE_MARKER + "\n\n"
+            )
+        out.write(normalize_text(extended_text))
+
+def write_page(output_filename, yaml_data,
+               leading_text, extended_text):
+    full_filename = os.path.join("pages", output_filename)
+    with codecs.open(full_filename, "w", "utf-8") as out:
+        out.write("<!--")
+        out.write(yaml.safe_dump(yaml_data,
+                                 default_flow_style=False,
+                                 allow_unicode=True).decode("utf-8"))
+        out.write("-->\n")
+        out.write(normalize_text(leading_text))
+        out.write(
+            "\n\n" + MORE_MARKER + "\n\n"
+            )
+        out.write(normalize_text(extended_text))
+
+###########################################################################
+# Main
+###########################################################################
+    
+if __name__ == '__main__':
+    for needed_dir in ["_posts", "pages"]:
+        if os.path.exists(needed_dir):
+            print "There's already a %s directory here, "\
+                "I'm not going to overwrite it." % needed_dir
+            sys.exit(1)
+        else:
+            os.mkdir(needed_dir)
+    for created_file in ["_config_extra.py"]:
+        if os.path.exists(created_file):
+            print "There's already a %s file here, "\
+                "I'm not going to overwrite it." % created_file
+            sys.exit(1)
+
+    for entry in get_blog_posts():
+        yaml_data = {
+            # Standard blogofile fields
+            "title": entry.title,
+            "date": entry.creation_time().strftime("%Y/%m/%d %H:%M:%S"),
+            "updated": entry.last_modification_time().strftime("%Y/%m/%d %H:%M:%S"),
+            "categories": entry.category(),
+            "tags": ", ".join(entry.tags()),
+            "permalink": entry.permalink(),
+            "guid": entry.id,
+            "author": entry.author.realname,
+            "draft": entry.isdraft and True or False,
+            # source, yaml, content, filename - reserved
+
+            # Custom fields
+            "author_email": entry.author.email,
+            "meta_description": entry.meta_description(),
+            "meta_keywords": entry.meta_keywords(),
+            }
+        # Custom properties
+        for s9prop, blgprop in CUSTOM_ENTRY_PROPERTIES:
+            value = entry.custom_property(s9prop)
+            if value is not None:
+                yaml_data[blgprop] = value
+
+        # Cleaning empty values
+        for empty_key in [
+            key 
+            for key, value in yaml_data.iteritems()
+            if value is None
+            ]:
+            del yaml_data[empty_key]
+        if not yaml_data["draft"]:
+            del yaml_data["draft"]
+
+        lead_text = entry.body
+        remaining_text = entry.extended
+
+        #print yaml_data
+        post_filename = u"{0}-{1}.{2}".format(
+            str(entry.id).zfill(4),
+            re.sub(r'[/!:?\-,\']', '',
+                   entry.title.strip().lower().replace(' ', '_')),
+            FILENAME_EXTENSION,
+            )
+
+        print "Writing {0} ({1})".format(post_filename, entry.permalink())
+        write_post(post_filename, yaml_data, lead_text, remaining_text)
+
+    for page in get_static_pages():
+        yaml_data = {
+            "title": page.pagetitle,
+            "date": page.creation_time().strftime("%Y/%m/%d %H:%M:%S"),
+            "updated": entry.last_modification_time().strftime("%Y/%m/%d %H:%M:%S"),
+            "permalink": page.permalink,
+            "guid": page.id,
+            "draft": page.is_draft(),
+            }
+
+        lead_text = page.pre_content
+        remaining_text = page.content
+
+        page_filename = re.sub(r'^.*/', '', page.permalink)
+        print "Writing static page {0} {1} {2}".format(page_filename, page.id, page.headline, page.permalink)
+        write_page(page_filename, yaml_data, lead_text, remaining_text)
+
+    print "Writing _config_extra.py"
+    with open("_config_extra.py", "w") as conffile:
+        conffile.write("""
+site.url = "%s"
+
+blog.name = "%s"
+
+blog.description = "%s"
+
+""" % (get_config_item("baseURL"),
+       get_config_item("blogTitle"),      
+       get_config_item("blogDescription")))

serendipity2disqus.py

+# Not yet implemented
+
+_references   (trackbacki chyba albo nie wiem)
+_comments