1. David Chambers
  2. Mango


Mango / models.py

# -*- coding: utf-8 -*-

import datetime
import os
import re

import markdown
import pytz

from django.conf import settings
from django.core.cache import cache
from django.db import models
from django.template import Context, loader

from mango.settings import *

block = r'(?m)^(%s(?=[ \n])[^\n]*(\n|$))+'
match = r'(?m)^%s(?=[ \n]) ?'

RE = {
    '\r\n?': re.compile(r'\r\n?'),
    'alias=>canon': re.compile(r'^(0*(?P<alias>.*?)=>)?(?P<canon>.+)$'),
    'excerpt': re.compile(block % r'\|'),
    'excerpt_pipes': re.compile(match % r'\|'),
    'filesize': re.compile(r'''{{\s*filesize:(['"])(?P<filepath>\S+)\1\s*}}'''),
    'fragment': re.compile(r'(?s)(<code>.*?</code>|<pre>.*?</pre>|<skip>.*?</skip>)'),
    'hand-crafted': re.compile(r'( {,3}\S+:.*(\n[ \t]*\S+:.*)*\n{2,})?(?P<excerpt>(\|(?=[ \n])[^\n]*\n)+)'),
    'heading': re.compile(r'(?m)\s*<(h[1-6])[^>]*>(?P<title>.+?)</\1>$(?P<html>[\s\S]*)'),
    'replacements': (
        # ... -> ellipsis
        (re.compile(r'(?<![.])[.]{3}(?![.])'), u'\u2026'),
        # [space][hyphen][hyphen][space] -> [thin space][em dash][thin space]
        (re.compile(r' -- '), u'\u2009\u2014\u2009'),
    'skip': re.compile(r'</?skip>'),
    'snippet': re.compile(r'(?s)^<(code|pre|skip)>.*?</\1>$'),
    'update': re.compile(block % r'\|\|'),
    'update_pipes': re.compile(match % r'\|\|'),

md = markdown.Markdown(extensions=('meta',) + MARKDOWN_EXTENSIONS)
update_template = loader.get_template('update.dhtml')

class Resource(object):
    def __init__(self, path):
        super(Resource, self).__init__()
        canon_fragments, short_fragments = [], []
        head, tail = os.path.split(path)
        while tail:
            match = re.match(RE['alias=>canon'], tail)
            if match:
                canon = match.group('canon')
                if not canon_fragments and not os.path.isdir(path):
                    canon = os.path.splitext(canon)[0] # strip extension
                canon_fragments.insert(0, canon)
                short_fragments.insert(0, match.group('alias') or canon)
            head, tail = os.path.split(head)
        replacement = (UNIX_DOCUMENTS_PATH, u'', 1)
        canon_path = (u'/%s/' % '/'.join(canon_fragments)).replace(*replacement).lower()
        short_path = (u'/%s/' % '/'.join(short_fragments)).replace(*replacement).lower()
        self.urls = {
            'canon': {'rel': canon_path, 'abs': BASE_URL + canon_path},
            'short': {'rel': short_path, 'abs': SHORT_URL_BASE + short_path},

        aliases = [u'']
        fragments = zip(canon_path.split(u'/')[1:], short_path.split(u'/')[1:])
        for canon_fragment, short_fragment in fragments:
            # append the canonical fragment to each alias
            updated = [u'/'.join([alias, canon_fragment]) for alias in aliases]
            if short_fragment != canon_fragment:
                # branch each alias and append the short fragment to the new branches
                updated += [u'/'.join([alias, short_fragment]) for alias in aliases]
            aliases = updated
        self._aliases = aliases

class Document(Resource):
    def __init__(self, filepath=None):
        if filepath:
            super(Document, self).__init__(path=filepath)
            self._filepath = filepath
            f = open(filepath)
            contents = unicode(f.read(), 'utf-8')

    def convert(self, contents):
        self.body = contents = re.sub(RE['\r\n?'], '\n', contents) + '\n'

        # excerpts
        snippets = []
        match = re.match(RE['hand-crafted'], contents)
        if match:
            capture = match.group('excerpt')
            snippets.append(re.sub(RE['excerpt_pipes'], u'', capture))
            contents = contents.replace(capture, u'')
        for match in re.finditer(RE['excerpt'], contents):
            capture = match.group(0)
            snippet = re.sub(RE['excerpt_pipes'], u'', capture)
            contents = contents.replace(capture, snippet)
        self.excerpt = md.convert('\n\n'.join(snippets))

        # updates
        for match in re.finditer(RE['update'], contents):
            capture = match.group(0)
            update = Document()
            context = Context({'update': update.convert(
                    re.sub(RE['update_pipes'], u'', capture))})
            contents = contents.replace(capture, update_template.render(context))

        self.html = md.convert(contents)
        self.meta = getattr(md, 'Meta', {})
        for key, value in self.meta.items():
            self.meta[key] = value
            if len(value) == 1: # note: `value` is always a list
                if key in META_LISTS:
                    self.meta[key] = value[0].split(', ')
                    self.meta[key] = value[0]

        self.datetime = None
        if 'date' in self.meta and 'time' in self.meta:
            tz = pytz.timezone(settings.TIME_ZONE)
            dt_format = u'%s %s' % (MARKDOWN_DATE_FORMAT, MARKDOWN_TIME_FORMAT)
                self.datetime = tz.localize(datetime.datetime.strptime('%s %s' % (
                        self.meta['date'], self.meta['time']), dt_format)).astimezone(pytz.utc)
            except ValueError: # date and/or time incorrectly formatted

        self.title = self.meta.get('title', u'')
        if not self.title:
            match = re.match(RE['heading'], self.html)
            if match:
                self.title = match.group('title')
                self.html = match.group('html')

        def filesize(filepath):
            if not os.path.isabs(filepath):
                filepath = os.path.join(PROJECT_PATH, filepath)
                filesize = os.path.getsize(filepath)
            except OSError:
                return u'' # fail silently

            bytes = (
                ('bytes', 1),
                ('kB', KILOBYTE_SIZE**1),
                ('MB', KILOBYTE_SIZE**2),
                ('GB', KILOBYTE_SIZE**3),
                ('TB', KILOBYTE_SIZE**4),
            for unit, value in bytes:
                if filesize <= value * KILOBYTE_SIZE or unit == 'TB':
                    if unit == 'bytes':
                        return u'(%s\u2009bytes)' % filesize
                        return u'(≈%.1f\u2009%s)' % (float(filesize)/value, unit)

        fragments = re.split(RE['fragment'], self.html)
        self.html = u''
        for fragment in fragments:
            if not re.match(RE['snippet'], fragment):
                fragment = re.sub(RE['filesize'],
                        lambda match: u'<span class="filesize">%s</span>' % (
                        filesize(match.group('filepath'))), fragment)
                if REPLACEMENTS:
                    for pattern, replacement in RE['replacements']:
                        fragment = re.sub(pattern, replacement, fragment)
            self.html += fragment
        self.body = re.sub(RE['skip'], '', self.body)
        self.html = re.sub(RE['skip'], '', self.html)
        self.excerpt = self.excerpt or self.html
        self.type = self.meta.get('type', 'post' if self.datetime else 'page')

        return self

    def __unicode__(self):
        return self.title

class Category(Resource):
    def toplevel(cls):
        cache_key = 'mango:toplevel:%s' % BASE_URL
        toplevel = cache.get(cache_key)
        if toplevel:
            if settings.DEBUG:
                print 'Document tree retrieved from cache'
            return toplevel
        toplevel = Category(DOCUMENTS_PATH)
        cache.set(cache_key, toplevel, INDEX_CACHE_SECONDS)
        if settings.DEBUG:
            print 'Document tree created and cached'
        return toplevel

    def __init__(self, dirpath):
        super(Category, self).__init__(path=dirpath)
        self._dirpath = dirpath
        self.name = re.match(RE['alias=>canon'],
        self.pages = []
        self.posts = []
        self.subcategories = []

        for name in [f for f in os.listdir(dirpath) if not f.startswith('.')]:
            path = os.path.join(dirpath, name)
            if os.path.isdir(path):
                category = Category(path)

        self.subcategories.sort(key=lambda category: category.name)
        self.pages.sort(key=lambda page: page.title)
        self.posts.sort(key=lambda post: post.datetime, reverse=True)

    def add_document(self, filepath):
        cache_key = 'mango:%s' % filepath
        document, mod_time = cache.get(cache_key, (None, None)) # retrieve Document
        if document and mod_time == os.path.getmtime(filepath):
            if settings.DEBUG:
                print 'Document object retrieved from cache: %s' % filepath
        else: # modified or not in cache, so create and cache a new Document object
            document = Document(filepath)
            cache.set(cache_key, (document, os.path.getmtime(filepath)), POST_CACHE_SECONDS)
            if settings.DEBUG:
                print 'Document object created and cached: %s' % filepath

        if document.type == 'page':
        return self

    def descendants(self, pages=False):
        documents = self.posts[:]
        if pages:
            documents += self.pages[:]
        for subcategory in self.subcategories:
            documents += subcategory.descendants(pages)
        return documents

    def archives(self):
        archives = []
        posts = self.descendants()
        if posts:
            posts.sort(key=lambda post: post.datetime, reverse=True)
            year = posts[0].datetime.year
            month = posts[0].datetime.month
            these_posts = []
            for post in posts:
                if post.datetime.year == year and post.datetime.month == month:
                    archives.append((year, month, these_posts))
                    year, month = post.datetime.year, post.datetime.month
                    these_posts = [post]
            archives.append((year, month, these_posts))
        return archives

    def find_match(self, urlpath):
        if urlpath in self._aliases:
            return self
        for document in self.pages + self.posts:
            if urlpath in document._aliases:
                return document
        for subcategory in self.subcategories:
            match = subcategory.find_match(urlpath)
            if match:
                return match
        return None

    def _tags(self):
        tags = []
        for document in self.pages + self.posts:
            tags += [tag for tag in document.meta.get('tags', [])]
        for subcategory in self.subcategories:
            tags += subcategory._tags()
        return tags

    def tags(self):
        tags = {}
        for tag in self._tags():
            tags[tag] = tags.get(tag, 0) + 1
        return [(key, value) for key, value in sorted(tags.items(),
                key=lambda pair: pair[0].lower())]

    def __unicode__(self):
        return self.name

if SUBSCRIPTIONS: # since model requires a database, define it only if required
    class Subscription(models.Model):
        subscriber_name = models.CharField(max_length=100)
        subscriber_email = models.EmailField(max_length=100)
        url = models.URLField(max_length=100)