Source

example_blaag / blaag.py

The default branch has multiple heads

Full commit
# -*- encoding: utf-8 -*-
from string import Template
from datetime import datetime, timedelta
from time import time
import os, os.path, shutil, re
from docutils.core import publish_parts
from hgapi import hgapi
import PyRSS2Gen

CSS_NAME = "haze.css"
CSS_URL = '/haze.css'
CSS_PATH = 'css'
ENTRIES_PATH = 'entries'
HTML_PATH = 'html'
BASE = 'http://blaag.haard.se/'
TITLE = u'Fredrik Håård\'s Blaag'
DESCRIPTION = u'YAB'
DISQUS_NAME = 'devrantblaag'
AUTHOR = u'Fredrik Håård'

defaults = {'disqus_name': DISQUS_NAME, 
            'css': CSS_URL + "?" + str(int(time())), #force reload. 
            'page_title': TITLE,
            'blaag_title': TITLE,
            'author': AUTHOR}

page_tpl = Template(open("templates/pagetemplate.html").read().decode("utf-8"))
blaag_tpl = Template(open("templates/blaag_template.html").read().decode("utf-8"))
comments_full_tpl = Template(open("templates/comments_full.html").read().decode("utf-8"))
comments_short_tpl = Template(open("templates/comments_short.html").read().decode("utf-8"))
menu_link_tpl = Template(open("templates/menu_link.html").read().decode("utf-8"))
timestamp_tpl = Template(open("templates/timestamp.html").read().decode("utf-8"))
created_tpl = Template(open("templates/created.html").read().decode("utf-8"))
modified_tpl = Template(open("templates/modified.html").read().decode("utf-8"))

def sieve(*words):
    """Filter generator for RSS generation"""
    def sieve_impl(parts):
        for word in words:
            if (word in parts['title'].lower() or
                word in parts['body'].lower()):
                return True
        return False
    return sieve_impl


FEEDS = {
    'rss.xml': lambda x: True,
    'python.xml': sieve('python')
    }

def generate_rss(data, sieve, name):
    """Generate the rss feed, from a list of docutils parts dicts.
    Depends on that the parts['created'] contains UTC """
    items = []
    for parts in data:
        if sieve(parts):
            items.append(PyRSS2Gen.RSSItem(
                    title = parts['title'],
                    link = url(parts),
                    description = parts['html_body'],
                    guid = PyRSS2Gen.Guid(url(parts)),
                    pubDate = datetime.fromtimestamp(parts['created'])))

    rss = PyRSS2Gen.RSS2(
        title = TITLE,
        link = BASE,
        description = DESCRIPTION,
        lastBuildDate = datetime.utcnow(),
        items = items)

    rss.write_xml(open(os.path.join(HTML_PATH, name), "w"))
    
def get_data(entry):
    """Parse the rst in the string entry and return the parts dict"""
    return publish_parts(entry, writer_name="html")

def get_entries():
    """Get the filenames of all entries, sorted using [].sort(reverse=True)"""
    entries = [x for x in os.listdir(ENTRIES_PATH) if x.endswith(".rst")]
    entries.sort(reverse=True)
    return entries

def get_data_entries(entries):
    """Read and parse entries, return document parts as generated by docutils"""
    data = []
    for entry in entries:
        with open(os.path.join(ENTRIES_PATH, entry)) as source:
            parts = get_data(source.read().decode("utf-8"))
            parts.update(get_entry_dates(entry))
            data.append(parts)
    return data

def identifier(parts):
    """Create a identifier from doc title, replacing special chars with dash (-)"""
    return re.sub("[^\w]", "-", parts["title"]).strip("-")
    
def url(parts):
    """Create a url from a document"""
    return BASE + identifier(parts)

def setup_target_folder():
    """Create target folder if it does not exist, and copy CSS files there"""
    if not os.path.exists(HTML_PATH):
        os.mkdir(HTML_PATH)
    shutil.copy(os.path.join(CSS_PATH, CSS_NAME), HTML_PATH)
    shutil.copy(os.path.join(CSS_PATH, "html4css1.css"), HTML_PATH)

def hgdate_to_utc(hgdate_string):
    """Convert a hgdate format string to unix time UTC"""
    return sum([int(i) for i in hgdate_string.split()])

def get_entry_dates(entry):
    """Get (creation, last modification) as unix time UTC"""
    repo = hgapi.Repo(ENTRIES_PATH)
    times = repo.hg_log(template='{date|hgdate}\n', filespec=entry).split("\n")
   
    if times == ['']: #not in repo yet
        print("Warning: %s is not version controlled" % (entry,))
        created = modified = int(time())
    else:

        if not times[-1]: del times[-1]
        created = hgdate_to_utc(times[-1])
        modified = hgdate_to_utc(times[0])
    return {'created': created, 'modified': modified}

def get_timestamp(created, modified):
    """Return html for the timestamp portion of a post"""
    cr, mod = [datetime.fromtimestamp(ts) for ts in created, modified]
    ts_str = created_tpl.substitute(timestamp=cr.strftime("%y%m%d %H:%M"))
    #Add modification date if last change is more than 10 minutes after creation
    if mod - cr > timedelta(minutes=10):
        ts_str += modified_tpl.substitute(timestamp=mod.strftime("%y%m%d %H:%M"))
    return timestamp_tpl.substitute(timestamps = ts_str)

def generate_blaag(entries):
    """Generate html from blaag entries."""
    setup_target_folder()
    data = get_data_entries(entries)
    main = []

    #Get timestamps from Mercurial
    for parts in data:
        parts.update({'timestamp': get_timestamp(parts['created'], parts['modified']),
                      'identifier': identifier(parts)})
    #Sort by creation date
    data.sort(cmp=lambda x, y: y['created']-x['created'])
    #Create links
    links = "\n".join([menu_link_tpl.substitute(link=url(parts), 
                                                   title=parts['title']) 
                          for parts in data])

    
    for parts in data: 
        parts.update(defaults)
        htmltitle = identifier(parts)
        target_path = os.path.join(HTML_PATH, htmltitle)
        if not os.path.exists(target_path):
            os.mkdir(target_path)

        with open(os.path.join(HTML_PATH, htmltitle, "index.html"), "w") as target:
            #Write page for post
            fullcomments = comments_full_tpl.substitute(parts, threadid=htmltitle)
            blaag_html = blaag_tpl.substitute(parts, comments=fullcomments)
            target.write(page_tpl.substitute(parts, title=parts['title'], 
                                             links=links, main=blaag_html).encode("utf-8"))
            #Append post to main page
            shortcomments = comments_short_tpl.substitute(parts, 
                                                          link=url(parts), 
                                                          threadid=identifier(parts))
            main.append(blaag_tpl.substitute(parts, 
                                             comments=shortcomments, 
                                             link=url(parts)))
    #Write main page
    with open(os.path.join(HTML_PATH, "index.html"), "w") as target:
        target.write(page_tpl.substitute(defaults, links=links, 
                                         main="\n".join(main)).encode("utf-8"))
    #Write rss
    for name in FEEDS:
        generate_rss(data, FEEDS[name], name)

def pretxncommit_hook(*args, **kwargs):
    """Run before commit"""
    generate_blaag(get_entries())

if __name__ == "__main__":
    generate_blaag(get_entries())