Source

example_blaag / blaag.py

The default branch has multiple heads

Full commit
# -*- encoding: utf-8 -*-
from string import Template
from datetime import datetime, timedelta
from time import time
import os, os.path, shutil, re
from docutils.core import publish_parts
from hgapi import hgapi
import PyRSS2Gen
from ConfigParser import SafeConfigParser
import codecs

def get_configuration(path):
    defaults = {
        'content': {
            'title': u"Unknown title",
            'author': u"Unknown author",
            'description': u"Unknown description",
            'entries': 'entries',
            },
        'html': {
            'css_name': None,
            'css_url': None,
            'css_path': None,
            'templates_path': 'templates',
            'html_path': 'html',
            'base_url': None,
            },
        'disqus': {
            'name': None,
            }
        }
    config = SafeConfigParser(defaults)
    f = codecs.open(path, 'r', 'utf-8') # make sure config is read as unicode
    config.readfp(f)
    f.close()
    return config

config = get_configuration('blaag.cfg')

TITLE = config.get('content', 'title')
AUTHOR = config.get('content', 'author')
DESCRIPTION = config.get('content', 'description')
ENTRIES_PATH = config.get('content', 'entries_path')

CSS_NAME = config.get('html', 'css_name')
CSS_URL = config.get('html', 'css_url')
CSS_PATH = config.get('html', 'css_path')
TEMPLATES_PATH = config.get('html', 'templates_path')
HTML_PATH = config.get('html', 'html_path')
BASE = config.get('html', 'base_url')

DISQUS_NAME = config.get('disqus', 'name')

defaults = {'disqus_name': DISQUS_NAME, 
            'css': CSS_URL + "?" + str(int(time())), #force reload. 
            'page_title': TITLE,
            'blaag_title': TITLE,
            'author': AUTHOR}

def make_template(name):
    f = open(os.path.join(TEMPLATES_PATH, name), 'r')
    result = Template(f.read().decode('utf-8'))
    f.close()
    return result

page_tpl = make_template('pagetemplate.html')
blaag_tpl = make_template('blaag_template.html')
comments_full_tpl = make_template('comments_full.html')
comments_short_tpl = make_template('comments_short.html')
menu_link_tpl = make_template('menu_link.html')
timestamp_tpl = make_template('timestamp.html')
created_tpl = make_template('created.html')
modified_tpl = make_template('modified.html')

def sieve(*words):
    """Filter generator for RSS generation"""
    def sieve_impl(parts):
        for word in words:
            if (word in parts['title'].lower() or
                word in parts['body'].lower()):
                return True
        return False
    return sieve_impl


FEEDS = {
    'rss.xml': lambda x: True,
    'python.xml': sieve('python')
    }

def generate_rss(data, sieve, name):
    """Generate the rss feed, from a list of docutils parts dicts.
    Depends on that the parts['created'] contains UTC """
    items = []
    for parts in data:
        if sieve(parts):
            items.append(PyRSS2Gen.RSSItem(
                    title = parts['title'],
                    link = url(parts),
                    description = parts['html_body'],
                    guid = PyRSS2Gen.Guid(url(parts)),
                    pubDate = datetime.fromtimestamp(parts['created'])))

    rss = PyRSS2Gen.RSS2(
        title = TITLE,
        link = BASE,
        description = DESCRIPTION,
        lastBuildDate = datetime.utcnow(),
        items = items)

    rss.write_xml(open(os.path.join(HTML_PATH, name), "w"))
    
def get_data(entry):
    """Parse the rst in the string entry and return the parts dict"""
    return publish_parts(entry, writer_name="html")

def get_entries():
    """Get the filenames of all entries, sorted using [].sort(reverse=True)"""
    entries = [x for x in os.listdir(ENTRIES_PATH) if x.endswith(".rst")]
    entries.sort(reverse=True)
    return entries

def get_data_entries(entries):
    """Read and parse entries, return document parts as generated by docutils"""
    data = []
    for entry in entries:
        with open(os.path.join(ENTRIES_PATH, entry)) as source:
            parts = get_data(source.read().decode("utf-8"))
            parts.update(get_entry_dates(entry))
            data.append(parts)
    return data

def identifier(parts):
    """Create a identifier from doc title, replacing special chars with dash (-)"""
    return re.sub("[^\w]", "-", parts["title"]).strip("-")
    
def url(parts):
    """Create a url from a document"""
    return BASE + identifier(parts)

def setup_target_folder():
    """Create target folder if it does not exist, and copy CSS files there"""
    if not os.path.exists(HTML_PATH):
        os.mkdir(HTML_PATH)
    shutil.copy(os.path.join(CSS_PATH, CSS_NAME), HTML_PATH)
    shutil.copy(os.path.join(CSS_PATH, "html4css1.css"), HTML_PATH)

def hgdate_to_utc(hgdate_string):
    """Convert a hgdate format string to unix time UTC"""
    return sum([int(i) for i in hgdate_string.split()])

def get_entry_dates(entry):
    """Get (creation, last modification) as unix time UTC"""
    repo = hgapi.Repo(ENTRIES_PATH)
    times = repo.hg_log(template='{date|hgdate}\n', filespec=entry).split("\n")
   
    if times == ['']: #not in repo yet
        print("Warning: %s is not version controlled" % (entry,))
        created = modified = int(time())
    else:

        if not times[-1]: del times[-1]
        created = hgdate_to_utc(times[-1])
        modified = hgdate_to_utc(times[0])
    return {'created': created, 'modified': modified}

def get_timestamp(created, modified):
    """Return html for the timestamp portion of a post"""
    cr, mod = [datetime.fromtimestamp(ts) for ts in created, modified]
    ts_str = created_tpl.substitute(timestamp=cr.strftime("%y%m%d %H:%M"))
    #Add modification date if last change is more than 10 minutes after creation
    if mod - cr > timedelta(minutes=10):
        ts_str += modified_tpl.substitute(timestamp=mod.strftime("%y%m%d %H:%M"))
    return timestamp_tpl.substitute(timestamps = ts_str)

def generate_blaag(entries):
    """Generate html from blaag entries."""
    setup_target_folder()
    data = get_data_entries(entries)
    main = []

    #Get timestamps from Mercurial
    for parts in data:
        parts.update({'timestamp': get_timestamp(parts['created'], parts['modified']),
                      'identifier': identifier(parts)})
    #Sort by creation date
    data.sort(cmp=lambda x, y: y['created']-x['created'])
    #Create links
    links = "\n".join([menu_link_tpl.substitute(link=url(parts), 
                                                   title=parts['title']) 
                          for parts in data])

    
    for parts in data: 
        parts.update(defaults)
        htmltitle = identifier(parts)
        target_path = os.path.join(HTML_PATH, htmltitle)
        if not os.path.exists(target_path):
            os.mkdir(target_path)

        with open(os.path.join(HTML_PATH, htmltitle, "index.html"), "w") as target:
            #Write page for post
            fullcomments = comments_full_tpl.substitute(parts, threadid=htmltitle)
            blaag_html = blaag_tpl.substitute(parts, comments=fullcomments)
            target.write(page_tpl.substitute(parts, title=parts['title'], 
                                             links=links, main=blaag_html).encode("utf-8"))
            #Append post to main page
            shortcomments = comments_short_tpl.substitute(parts, 
                                                          link=url(parts), 
                                                          threadid=identifier(parts))
            main.append(blaag_tpl.substitute(parts, 
                                             comments=shortcomments, 
                                             link=url(parts)))
    #Write main page
    with open(os.path.join(HTML_PATH, "index.html"), "w") as target:
        target.write(page_tpl.substitute(defaults, links=links, 
                                         main="\n".join(main)).encode("utf-8"))
    #Write rss
    for name in FEEDS:
        generate_rss(data, FEEDS[name], name)

def pretxncommit_hook(*args, **kwargs):
    """Run before commit"""
    generate_blaag(get_entries())

if __name__ == "__main__":
    generate_blaag(get_entries())