Source

django-easyurls / easyurls.py

Full commit
#!/usr/bin/env python
#
# Copyright (C) 2009 by Ollie Rutherfurd <oliver@rutherfurd.net>
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
# 
# * Redistributions of source code must retain the above copyright notice, 
#   this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
#   this list of conditions and the following disclaimer in the documentation
#   and/or other materials provided with the distribution.
# * The name of the author may not be used to endorse or promote products 
#   derived from this software without specific prior written permission.
# 
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE. 
#
r"""
Making it easier to read and write Django URL patterns.

By making assumptions, like ``year`` is usually a 4-digit number and
``id`` 1 or more digits, `django-easyurls` takes much of the repetition
out of defining URLs, using a syntax that's shorter and easier to read.

Compare the following:

.. sourcecode:: python

    # standard
    urlpatterns += patterns('django.views.generic.date_based',
        url(r'^(?P<year>\d{4})/(?P<month>[a-z]{3})/(?P<day>\w{1,2})/(?P<slug>[-\w]+)/$',
            'object_detail', info_dict),
        url(r'^(?P<year>\d{4})/(?P<month>[a-z]{3})/(?P<day>\d{1,2})/$',
            'archive_day',   info_dict),
        url(r'^(?P<year>\d{4})/(?P<month>[a-z]{3})/$', 'archive_month', info_dict),
        url(r'^(?P<year>\d{4})/$', 'archive_year',  info_dict),
    )
    
    # use easyurls, and let it generate the regex for you
    from easyurls import regex as p
    urlpatterns += patterns('django.views.generic.date_based',
        url(p('<year>/<month:mon>/<day>/<slug>'), 'object_detail', info_dict),
        url(p('<year>/<month:mon>/<day>'),        'archive_day',   info_dict),
        url(p('<year>/<month:mon>'),              'archive_month', info_dict),
        url(p('<year>'),                          'archive_year',  info_dict),
    )

These two sets of URL patterns are functionally equivalent -- the same
regex is passed to ``url()`` in both cases, but the second is shorter
and clearer.  Why keep repeating that year is a 4-digit number, month is
3 letters, and day a 1 or 2 digit number?  Also, since 99% of the
time one wants the pattern to start with '^', and end with '/$', why
keep repeating it?  If you don't want them, you can say so.

django-easyurls works by defining names for patterns and generating
regular expressions for you.  By default, the name of the captured
variable is the name of the pattern.  This can be overriden, as is done
above where the "mon" pattern is used for "month", instead of the
default ``\d{1,2}``.

Here's a list of the default patterns:

.. sourcecode:: pycon

    >>> from easyurls import regex as p
    >>> for name in sorted(p.patterns):
    ...     print '%5s: %s' % (name,p.patterns[name])
      day: \d{1,2}
       id: \d+
      mon: [a-z]{3}
    month: \d{1,2}
        n: \d+
     slug: [\w-]+
      tag: \w+
     year: \d{4}

To use a different name for a pattern, or different pattern for a name,
add the pattern after the name, prefixing the pattern with ":".

.. sourcecode:: pycon

    # default for month is \d{1,2}
    >>> print p('<month>')
    ^(?P<month>\d{1,2})/$

    # using [a-z]{3} for month
    >>> print p('<month:mon>')
    ^(?P<month>[a-z]{3})/$

    # using [a-z]{3} for mmm
    >>> print p('<mmm:mon>')
    ^(?P<mmm>[a-z]{3})/$

It's easy to add new or override existing patterns:

.. sourcecode:: pycon

    >>> p['yy'] = r'\d{2}'
    >>> p['mm'] = r'\d{2}'
    >>> p['dd'] = r'\d{2}'

    >>> print p('<year:yy>/<month:mm>/<day:dd>')
    ^(?P<year>\d{2})/(?P<month>\d{2})/(?P<day>\d{2})/$

By default, if no pattern is found, ``\d+`` is assumed.

.. sourcecode:: pycon

    >>> print p('releases/<project_id>')
    ^releases/(?P<project_id>\d+)/$

For flexibility, you can always use a regular expression.

.. sourcecode:: pycon

    # regex for unknown "zip_code"
    >>> print p('zip/<zip_code:\d{5}>')
    ^zip/(?P<zip_code>\d{5})/$

    # override slug, allowing "."
    >>> print p('<slug:[\w-.]+>')
    ^(?P<slug>[\w-.]+)/$

For demonstration, and testing, purposes here's how prepending and
appending or '^', '/', and '$' is handled:

.. sourcecode:: pycon

    >>> print p('')
    ^$
    >>> print p('foo$')
    ^foo$
    >>> print p('foo/')
    ^foo/$
    >>> print p('/')
    ^/$

Prepending of '^' and appending of '/' and '$' can be disabled.

.. sourcecode:: pycon

    >>> p('foo', anchor=False, terminate=False, append_slash=False)
    'foo'
"""
__version__ = '0.1'

import functools,re

# name: pattern
PATTERNS = {
    'day':   r'\d{1,2}',
    'id':    r'\d+',
    'month': r'\d{1,2}',
    'slug':  r'[\w-]+',
    'tag':   r'\w+',
    'year':  r'\d{4}',
    # these defined for the pattern, not name
    # ex: <month:mon>
    'mon':   r'[a-z]{3}', # jan, feb, etc...
    'n':     r'\d+',      # n=number
}

# <name[:pattern]>
VARIABLE = re.compile(r'<(?P<name>\w+)(?::?(?P<pattern>[^>]+))?>')


class URLPatternGenerator(object):
    def __init__(self, patterns=None, default=r'\d+',
                 append_slash=True, anchor=True, terminate=True):
        self.patterns = patterns or dict(PATTERNS.items())
        self.default = default              # default pattern
        self.append_slash = append_slash    # trailing /
        self.anchor = anchor                # prepend ^
        self.terminate = terminate          # append $
    
    def add(self, name, pattern):
        self.patterns[name] = pattern
    __setitem__ = add
    
    def replace(self, match, url):
        regexp = None
        name = match.group('name')
        pattern = match.group('pattern')
        # pattern may be a name or regexp
        if pattern:
            regexp = self.patterns.get(pattern, pattern)
        # use pattern for name, or default
        else:
            regexp = self.patterns.get(name, self.default)
        segment = '(?P<%s>%s)' % (name, regexp)
        return segment
    
    def __call__(self, url, **kw):
        r = VARIABLE.sub(functools.partial(self.replace, url=url), url)
        if kw.get('anchor', self.anchor) and r[:1] != '^':
            r = '^' + r
        # special-case so '^$' doesn't end up with a '/' in it
        if url and kw.get('append_slash', self.append_slash) and r[-1:] not in ('$','/'):
            r += '/'
        if kw.get('terminate', self.terminate) and r[-1:] != '$':
            r += '$'
        return r


# don't require creating an instance of the class
regex = URLPatternGenerator()


if __name__ == '__main__':
    import doctest; doctest.testmod()