1. faassen
  2. example_blaag


example_blaag / blaag.py

The default branch has multiple heads

# -*- encoding: utf-8 -*-
from string import Template
from datetime import datetime, timedelta
from time import time
import os, os.path, shutil, re
from docutils.core import publish_parts
from hgapi import hgapi
import PyRSS2Gen
from ConfigParser import SafeConfigParser
import codecs

def get_configuration(path):
    defaults = {
        'content': {
            'title': u"Unknown title",
            'author': u"Unknown author",
            'description': u"Unknown description",
            'entries': 'entries',
        'html': {
            'templates_path': 'templates',
            'html_path': 'html',
        'disqus': {
            'name': None,
    config = SafeConfigParser(defaults)
    f = codecs.open(path, 'r', 'utf-8') # make sure config is read as unicode
    return config

config = get_configuration('blaag.cfg')

TEMPLATES_PATH = config.get('html', 'templates_path')
BASE = config.get('html', 'base_url')

def make_template(name):
    f = open(os.path.join(TEMPLATES_PATH, name), 'r')
    result = Template(f.read().decode('utf-8'))
    return result

page_tpl = make_template('pagetemplate.html')
blaag_tpl = make_template('blaag_template.html')
comments_full_tpl = make_template('comments_full.html')
comments_short_tpl = make_template('comments_short.html')
menu_link_tpl = make_template('menu_link.html')
timestamp_tpl = make_template('timestamp.html')
created_tpl = make_template('created.html')
modified_tpl = make_template('modified.html')

def sieve(*words):
    """Filter generator for RSS generation"""
    def sieve_impl(parts):
        for word in words:
            if (word in parts['title'].lower() or
                word in parts['body'].lower()):
                return True
        return False
    return sieve_impl

    'rss.xml': lambda x: True,
    'python.xml': sieve('python')

def generate_rss(data, sieve, title, description, base_url):
    """Generate the rss feed, from a list of docutils parts dicts.
    Depends on that the parts['created'] contains UTC """
    items = []
    for parts in data:
        if sieve(parts):
                    title = parts['title'],
                    link = url(parts),
                    description = parts['html_body'],
                    guid = PyRSS2Gen.Guid(url(parts)),
                    pubDate = datetime.fromtimestamp(parts['created'])))

    return PyRSS2Gen.RSS2(
        title = title,
        link = base_url,
        description = description,
        lastBuildDate = datetime.utcnow(),
        items = items)

def get_data(entry):
    """Parse the rst in the string entry and return the parts dict"""
    return publish_parts(entry, writer_name="html")

def get_entries(entries_path):
    """Get the filenames of all entries, sorted using [].sort(reverse=True)"""
    entries = [x for x in os.listdir(entries_path) if x.endswith(".rst")]
    return entries

def get_data_entries(entries, entries_path):
    """Read and parse entries, return document parts as generated by docutils"""
    data = []
    for entry in entries:
        with open(os.path.join(entries_path, entry)) as source:
            parts = get_data(source.read().decode("utf-8"))
            parts.update(get_entry_dates(entry, entries_path))
    return data

def identifier(parts):
    """Create a identifier from doc title, replacing special chars with dash (-)"""
    return re.sub("[^\w]", "-", parts["title"]).strip("-")
def url(parts):
    """Create a url from a document"""
    return BASE + identifier(parts)

def setup_target_folder(html_path, css_path, css_name):
    """Create target folder if it does not exist, and copy CSS files there"""
    if not os.path.exists(html_path):
    shutil.copy(os.path.join(css_path, css_name), html_path)
    shutil.copy(os.path.join(css_path, "html4css1.css"), html_path)

def hgdate_to_utc(hgdate_string):
    """Convert a hgdate format string to unix time UTC"""
    return sum([int(i) for i in hgdate_string.split()])

def get_entry_dates(entry, entries_path):
    """Get (creation, last modification) as unix time UTC"""
    repo = hgapi.Repo(entries_path)
    times = repo.hg_log(template='{date|hgdate}\n', filespec=entry).split("\n")
    if times == ['']: #not in repo yet
        print("Warning: %s is not version controlled" % (entry,))
        created = modified = int(time())

        if not times[-1]: del times[-1]
        created = hgdate_to_utc(times[-1])
        modified = hgdate_to_utc(times[0])
    return {'created': created, 'modified': modified}

def get_timestamp(created, modified):
    """Return html for the timestamp portion of a post"""
    cr, mod = [datetime.fromtimestamp(ts) for ts in created, modified]
    ts_str = created_tpl.substitute(timestamp=cr.strftime("%y%m%d %H:%M"))
    #Add modification date if last change is more than 10 minutes after creation
    if mod - cr > timedelta(minutes=10):
        ts_str += modified_tpl.substitute(timestamp=mod.strftime("%y%m%d %H:%M"))
    return timestamp_tpl.substitute(timestamps = ts_str)

def generate_blaag():
    """Generate html from blaag entries."""
    html_path = config.get('html', 'html_path')

        css_path=config.get('html', 'css_path'),
        css_name=config.get('html', 'css_name')

    entries_path = config.get('content', 'entries_path')
    entries = get_entries(entries_path)
    data = get_data_entries(entries, entries_path)
    main = []

    #Get timestamps from Mercurial
    for parts in data:
        parts.update({'timestamp': get_timestamp(parts['created'], parts['modified']),
                      'identifier': identifier(parts)})
    #Sort by creation date
    data.sort(cmp=lambda x, y: y['created']-x['created'])
    #Create links
    links = "\n".join([menu_link_tpl.substitute(link=url(parts), 
                          for parts in data])

    title = config.get('content', 'title')

    defaults = {
        'disqus_name': config.get('disqus', 'name'),
        'css': config.get('html', 'css_url') + "?" + str(int(time())), #force reload. 
        'page_title': title,
        'blaag_title': title,
        'author': config.get('content', 'author'),

    for parts in data: 
        htmltitle = identifier(parts)
        target_path = os.path.join(html_path, htmltitle)
        if not os.path.exists(target_path):

        with open(os.path.join(html_path, htmltitle, "index.html"), "w") as target:
            #Write page for post
            fullcomments = comments_full_tpl.substitute(parts, threadid=htmltitle)
            blaag_html = blaag_tpl.substitute(parts, comments=fullcomments)
            target.write(page_tpl.substitute(parts, title=parts['title'], 
                                             links=links, main=blaag_html).encode("utf-8"))
            #Append post to main page
            shortcomments = comments_short_tpl.substitute(parts, 
    #Write main page
    with open(os.path.join(html_path, "index.html"), "w") as target:
        target.write(page_tpl.substitute(defaults, links=links, 
    #Write rss

    description = config.get('content', 'description')
    base_url = config.get('html', 'base_url')
    for name in FEEDS:        
        rss = generate_rss(data, FEEDS[name], title, description, base_url)
        f = open(os.path.join(html_path, name), 'w')
def pretxncommit_hook(*args, **kwargs):
    """Run before commit"""

if __name__ == "__main__":