Source

blaag / blaag.py

Full commit
# -*- encoding: utf-8 -*-
from datetime import datetime, timedelta
from time import time, mktime
import os, os.path, shutil, re
from docutils.core import publish_parts
from hgapi import hgapi
import PyRSS2Gen
from ConfigParser import SafeConfigParser, NoSectionError, NoOptionError
import codecs
import argparse
from genshi.template import TemplateLoader
from genshi.core import Markup

class Config(object):
    def __init__(self, path, defaults):
        self.config_parser = SafeConfigParser()
        f = codecs.open(path, 'r', 'utf-8') # make sure config is read as unicode
        self.config_parser.readfp(f)
        f.close()
        self.defaults = defaults

    def _get_default(self, getfunc, section, option):
        try:
            return getfunc(section, option)
        except (NoSectionError, NoOptionError):
            d = self.defaults.get(section, None)
            if d is None:
                raise NoSectionError(section)
            try:
                return d[option]
            except KeyError:
                raise NoOptionError(option, section)
    
    def get(self, section, option):
        return self._get_default(self.config_parser.get, section, option)

    def getboolean(self, section, option):    
        return self._get_default(self.config_parser.getboolean, section, option)
            
def get_configuration(path):
    defaults = {
        'content': {
            'title': u"Unknown title",
            'author': u"Unknown author",
            'description': u"Unknown description",
            'entries': 'entries',
            'custom_path': 'custom'
            },
        'datetime': {
            'datetime_format': '%y%m%d %H:%M',
            'use_filename_for_creation_time': False,
            },
        'html': {
            'html_path': 'html',
            'base_url': 'http://localhost:8000/',
            'test_base_url': 'http://localhost:8000/',
            },
        'disqus': {
            'name': None,
            },
        'google_analytics': {      
            'account': None,
            }
        }
    
    return Config(path, defaults)

def sieve(*words):
    """Filter generator for RSS generation"""
    def sieve_impl(parts):
        for word in words:
            if (word in parts['title'].lower() or
                word in parts['body'].lower()):
                return True
        return False
    return sieve_impl


FEEDS = {
    'rss.xml': lambda x: True,
    'python.xml': sieve('python')
    }

def generate_rss(data, sieve, title, description, base_url,
                 get_url):
    """Generate the rss feed, from a list of docutils parts dicts.
    Depends on that the parts['created'] contains UTC """
    items = []
    for parts in data:
        if sieve(parts):
            items.append(PyRSS2Gen.RSSItem(
                    title = parts['title'],
                    link = get_url(parts),
                    description = parts['html_body'],
                    guid = PyRSS2Gen.Guid(get_url(parts)),
                    pubDate = datetime.fromtimestamp(parts['created'])))

    return PyRSS2Gen.RSS2(
        title = title,
        link = base_url,
        description = description,
        lastBuildDate = datetime.utcnow(),
        items = items)
   
def get_data(entry):
    """Parse the rst in the string entry and return the parts dict"""
    return publish_parts(entry, writer_name="html")

def get_entries(entries_path):
    """Get the filenames of all entries, sorted using [].sort(reverse=True)"""
    entries = [x for x in os.listdir(entries_path) if x.endswith(".rst")]
    entries.sort(reverse=True)
    return entries

def get_data_entries(entries, entries_path, use_filename):
    """Read and parse entries, return document parts as generated by docutils"""
    data = []
    for entry in entries:
        with open(os.path.join(entries_path, entry)) as source:
            parts = get_data(source.read().decode("utf-8"))
            if not use_filename:
                entry_timeinfo = get_entry_dates(entry, entries_path)
            else:
                entry_timeinfo = get_filename_entry_dates(entry)
            parts.update(entry_timeinfo)
            data.append(parts)
    return data

def identifier(parts):
    """Create a identifier from doc title, replacing special chars with dash (-)"""
    return re.sub("[^\w]", "-", parts["title"]).strip("-")
    
def setup_target_folder(html_path, css_path, css_name):
    """Create target folder if it does not exist, and copy CSS files there"""
    if not os.path.exists(html_path):
        os.mkdir(html_path)
    shutil.copy(os.path.join(css_path, css_name), html_path)
    shutil.copy(os.path.join(css_path, "html4css1.css"), html_path)

def hgdate_to_utc(hgdate_string):
    """Convert a hgdate format string to unix time UTC"""
    return sum([int(i) for i in hgdate_string.split()])

def get_entry_dates(entry, entries_path):
    """Get (creation, last modification) as unix time UTC"""
    repo = hgapi.Repo(entries_path)
    times = repo.hg_log(template='{date|hgdate}\n', filespec=entry).split("\n")
   
    if times == ['']: #not in repo yet
        print("Warning: %s is not version controlled" % (entry,))
        created = modified = int(time())
    else:

        if not times[-1]: del times[-1]
        created = hgdate_to_utc(times[-1])
        modified = hgdate_to_utc(times[0])
    return {'created': created, 'modified': modified}

def get_filename_entry_dates(entry):
    """Get creation and last modification based on filename as unix time UTC."""
    year, month, day, extra = entry.split('-')
    t = int(mktime(datetime(int(year), int(month), int(day)).timetuple()))
    return {'created': t, 'modified': t}
    
def get_timestamps(created, modified, dt_format):
    """Return string representing timestamp in post"""
    created_dt = datetime.fromtimestamp(created)
    modified_dt = datetime.fromtimestamp(modified)
    created = created_dt.strftime(dt_format)
    
    #Add modification date if last change is more than 10 minutes after creation
    if modified_dt - created_dt > timedelta(minutes=10):
        modified = modified_dt.strftime(dt_format)
    else:
        modified = None
    return created, modified


def get_custom(path, name):
    source = os.path.join(path, "%s.html" % (name,))

    if os.path.exists(source):
        return open(source).read()
    else:
        return ''

class Article(object): 
    def __init__(self, full=True):
        self.short_comment = not full
        self.long_comment = full


def generate_blaag(config_path, test_mode=False):
    """Generate html from blaag entries."""
    config = get_configuration(config_path)

    loader = TemplateLoader(
        os.path.join(os.path.dirname(__file__), 'templates'),
        auto_reload=True
        )


    html_path = config.get('html', 'html_path')

    setup_target_folder(
        html_path=html_path,
        css_path=config.get('html', 'css_path'),
        css_name=config.get('html', 'css_name')
        )

    entries_path = config.get('content', 'entries_path')
    entries = get_entries(entries_path)

    use_filename_for_creation_time = config.getboolean(
        'datetime',
        'use_filename_for_creation_time')
        
    data = get_data_entries(
        entries, entries_path,
        use_filename_for_creation_time)

    main = []

    if not test_mode:
        base_url = config.get('html', 'base_url')
    else:
        base_url = config.get('html', 'test_base_url')
    def get_absolute_url(parts):
        """Create absolute url for a document"""
        return base_url + identifier(parts)
    
    #Get timestamps from Mercurial
    datetime_format = config.get('datetime', 'datetime_format')
    for parts in data:
        
        created, modified = get_timestamps(parts['created'], 
                                            parts['modified'],
                                            datetime_format)
        parts.update({
                'created_str': created,
                'modified_str': modified,
                'identifier': identifier(parts)
                })
    #Sort by creation date
    data.sort(cmp=lambda x, y: y['created']-x['created'])

    title = config.get('content', 'title')

    disqus = config.get('disqus', 'name')
    google_analytics =  config.get('google_analytics', 'account')

    footer = get_custom(
        config.get('content','custom_path'), 'footer')

    blurbs = get_custom(
        config.get('content','custom_path'), 'blurbs')


    defaults = {
        'disqus': disqus,
        'css': config.get('html', 'css_url') + "?" + str(int(time())), #force reload. 
        'page_title': title,
        'blaag_title': title,
        'author': config.get('content', 'author'),
        'google_analytics': google_analytics,
        'footer': Markup(footer),
        'blurbs': Markup(blurbs)
        }


    articles = []
    for parts in data:
        article = Article(full=False)
        article.article_id = identifier(parts)
        article.body = Markup(parts['html_body'])
        article.timestamp = parts['created_str']
        article.modidied = parts['modified_str']
        article.comment_thread_link = get_absolute_url(parts)
        article.article_url = get_absolute_url(parts)
        article.article_title = parts['title']
        article.path = os.path.join(html_path, identifier(parts))
        articles.append(article)
        
    template = loader.load("page_template.xhtml")
    stream = template.generate(articles = articles, all_articles=articles,
                               short_comments=True,
                               **defaults)
    content = stream.render('html', doctype='html')

    #Write main page
    with open(os.path.join(html_path, "index.html"), "w") as target:
        target.write(content)

    for article in articles:
        if not os.path.exists(article.path):
            os.mkdir(article.path)

        with open(os.path.join(article.path, "index.html"), "w") as target:            
            stream = template.generate(articles=[article], all_articles=articles, 
                                       short_comments=False,
                                       **defaults)
            target.write(stream.render("html", doctype="html"))



    #Write rss
    description = config.get('content', 'description')
    
    for name in FEEDS:        
        rss = generate_rss(data, FEEDS[name], title, description, base_url,
                           get_absolute_url)
        f = open(os.path.join(html_path, name), 'w')
        rss.write_xml(f)
        f.close()
        
def pretxncommit_hook(*args, **kwargs):
    """Run before commit"""
    generate_blaag('blaag.cfg')

def blaag_script():
    parser = argparse.ArgumentParser(description="generate blog html")
    parser.add_argument('-t', '--test',
                        action="store_true",
                        default=False,
                        help="generate html for local testing")
    parser.add_argument('-c', '--config',
                        default='blaag.cfg',
                        help="custom .cfg file (defaults to blaag.cfg)")
    args = parser.parse_args()

    generate_blaag(args.config, args.test)

if __name__ == "__main__":
    blaag_script()