Source

Mango / models.py

Full commit
#!/usr/bin/python
# -*- coding: utf-8 -*-

import datetime
import re

import markdown
import pytz

from django.conf import settings
from django.template import Context, loader

from mango.settings import *

block = r'(?m)^(%s(?=[ \n])[^\n]*(\n|$))+'
match = r'(?m)^%s(?=[ \n]) ?'

RE = {
    '\r\n?': re.compile(r'\r\n?'),
    'alias=>canon': re.compile(r'^(0*(?P<alias>.*?)=>)?(?P<canon>.+)$'),
    'excerpt': re.compile(block % r'\|'),
    'excerpt_pipes': re.compile(match % r'\|'),
    'filesize': re.compile(r'''{{\s*filesize:(['"])(?P<filepath>\S+)\1\s*}}'''),
    'fragment': re.compile(r'(?s)(<code>.*?</code>|<pre>.*?</pre>|<skip>.*?</skip>)'),
    'hand-crafted': re.compile(r' {,3}\S+:.*(\n[ \t]*\S+:.*)*\n{2,}(?P<excerpt>(\|(?=[ \n])[^\n]*\n)+)'),
    'heading': re.compile(r'(?m)\s*<(h[1-6])[^>]*>(?P<title>.+?)</\1>$(?P<html>[\s\S]*)'),
    'replacements': (
        # ... -> ellipsis
        (re.compile(r'(?<![.])[.]{3}(?![.])'), u'\u2026'),
        # [space][hyphen][hyphen][space] -> [thin space][em dash][thin space]
        (re.compile(r' -- '), u'\u2009\u2014\u2009'),
    ),
    'skip': re.compile(r'</?skip>'),
    'snippet': re.compile(r'(?s)^<(code|pre|skip)>.*?</\1>$'),
    'update': re.compile(block % r'\|\|'),
    'update_pipes': re.compile(match % r'\|\|'),
}

md = markdown.Markdown(extensions=('meta',) + MARKDOWN_EXTENSIONS)
update_template = loader.get_template('update.dhtml')

class Document:
    def __init__(self,
                 body,
                 urls=None,
                 kind=None,
                 title=None,
                 utc_datetime=None,
                 excerpt=None,
                 html=None):

        self.body = body
        self.urls = urls
        self.type = kind
        self.title = title
        self.datetime = utc_datetime
        self.excerpt = excerpt
        self.html = html

    def convert(self):
        self.body = body = re.sub(RE['\r\n?'], '\n', self.body) + '\n'

        # excerpts
        snippets = []
        match = re.match(RE['hand-crafted'], body)
        if match:
            capture = match.group('excerpt')
            snippets.append(re.sub(RE['excerpt_pipes'], u'', capture))
            body = body.replace(capture, u'')
        for match in re.finditer(RE['excerpt'], body):
            capture = match.group(0)
            snippets.append(re.sub(RE['excerpt_pipes'], u'', capture))
            body = body.replace(capture, u'')
        self.excerpt = md.convert('\n\n'.join(snippets))

        # updates
        for match in re.finditer(RE['update'], body):
            capture = match.group(0)
            update = Document(body=re.sub(RE['update_pipes'], u'', capture))
            context = Context({'update': update.convert()})
            body = body.replace(capture, update_template.render(context))

        self.html = md.convert(body)
        self.meta = getattr(md, 'Meta', {})
        for key, value in self.meta.items():
            self.meta[key] = value
            if len(value) == 1: # note: `value` is always a list
                if key in META_LISTS:
                    self.meta[key] = value[0].split(', ')
                else:
                    self.meta[key] = value[0]

        if 'date' in self.meta and 'time' in self.meta:
            tz = pytz.timezone(settings.TIME_ZONE)
            dt_format = u'%s %s' % (MARKDOWN_DATE_FORMAT, MARKDOWN_TIME_FORMAT)
            try:
                self.datetime = tz.localize(datetime.datetime.strptime('%s %s' % (
                        self.meta['date'], self.meta['time']), dt_format)).astimezone(pytz.utc)
            except ValueError: # date and/or time incorrectly formatted
                pass

        self.title = self.meta.get('title', u'')
        if not self.title:
            match = re.match(RE['heading'], self.html)
            if match:
                self.title = match.group('title')
                self.html = match.group('html')

        def filesize(filepath):
            if not os.path.isabs(filepath):
                filepath = os.path.join(PROJECT_PATH, filepath)
            try:
                filesize = os.path.getsize(filepath)
            except OSError:
                return u'' # fail silently

            bytes = (
                ('bytes', 1),
                ('kB', KILOBYTE_SIZE**1),
                ('MB', KILOBYTE_SIZE**2),
                ('GB', KILOBYTE_SIZE**3),
                ('TB', KILOBYTE_SIZE**4),
            )
            for unit, value in bytes:
                if filesize <= value * KILOBYTE_SIZE or unit == 'TB':
                    if unit == 'bytes':
                        return u'(%s\u2009bytes)' % filesize
                    else:
                        return u'(≈%.1f\u2009%s)' % (float(filesize)/value, unit)

        fragments = re.split(RE['fragment'], self.html)
        self.html = u''
        for fragment in fragments:
            if not re.match(RE['snippet'], fragment):
                fragment = re.sub(RE['filesize'],
                        lambda match: u'<span class="filesize">%s</span>' % (
                        filesize(match.group('filepath'))), fragment)
                if REPLACEMENTS:
                    for pattern, replacement in RE['replacements']:
                        fragment = re.sub(pattern, replacement, fragment)
            self.html += fragment

        self.body = re.sub(RE['skip'], '', self.body)
        self.html = re.sub(RE['skip'], '', self.html)
        self.excerpt = self.excerpt or self.html
        self.type = self.meta.get('type', 'post' if self.datetime else 'page')

        return self

    def set_urls(self, filepath):

        canon_fragments = [u'', u'']
        alias_fragments = [u'', u'']

        head, tail = os.path.split(os.path.realpath(filepath))
        match = re.match(RE['alias=>canon'], tail)
        if match:
            canon = os.path.splitext(match.group('canon'))[0] # strip extension
            canon_fragments.insert(1, canon)
            alias_fragments.insert(1, match.group('alias') or canon)

        while tail:
            head, tail = os.path.split(head)
            match = re.match(RE['alias=>canon'], tail)
            if match:
                canon = match.group('canon')
                canon_fragments.insert(1, canon)
                alias_fragments.insert(1, match.group('alias') or canon)

        canon_path = u'/'.join(canon_fragments).replace(UNIX_PATH_TO_POSTS, u'', 1)
        short_path = u'/'.join(alias_fragments).replace(UNIX_PATH_TO_POSTS, u'', 1)

        self.urls = {
            'canon': {'rel': canon_path, 'abs': BASE_URL + canon_path},
            'short': {'rel': short_path, 'abs': SHORT_URL_BASE + short_path},
        }

        return self

    def __unicode__(self):
        return self.title