Simon Meers avatar Simon Meers committed 46b3922

Initial commit

Comments (0)

Files changed (14)

+syntax: glob
+*.pyc
+*~
+MANIFEST
+The author of django-dbgettext Simon Meers <simon@simonmeers.com>

Empty file added.

+Copyright (c) 2009, Simon Meers
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+      copyright notice, this list of conditions and the following
+      disclaimer in the documentation and/or other materials provided
+      with the distribution.
+    * Neither the name of the author nor the names of other
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+include CHANGELOG
+include LICENSE
+include MANIFEST.in
+include README
+include AUTHORS
+Django dbgettext
+================
+
+Extracts translatable strings from Django models for handling via Django's 
+standard i18n mechanisms.

dbgettext/__init__.py

+# based on django.contrib.admin.__init__.py
+
+from django.utils.importlib import import_module
+LOADING = False
+
+def autodiscover():
+    """
+    Auto-discover INSTALLED_APPS dbgettext.py modules and fail silently when
+    not present. This forces an import on them to register any admin bits they
+    may want.
+    """
+    global LOADING
+    if LOADING:
+        return
+    LOADING = True
+
+    import imp
+    from django.conf import settings
+
+    for app in settings.INSTALLED_APPS:
+        try:
+            app_path = import_module(app).__path__
+        except AttributeError:
+            continue
+
+        try:
+            imp.find_module('gettext', app_path)
+        except ImportError:
+            continue
+
+        import_module("%s.gettext" % app)
+
+    LOADING = False
+
+# go:
+autodiscover()

dbgettext/html.py

+from django.conf import settings
+from registry import registry
+import re
+
+class Token(object):
+    """ A categorised chunk of HTML content """
+
+    translatable_types = ('text', 'whitespace',)
+
+    def __init__(self, type, raw):
+        self.type = type
+        self.raw = raw
+
+    def is_translatable(self):
+        if self.type in Token.translatable_types:
+            return True
+
+
+class Tag(Token):
+    """ An opening/closing/empty HTML tag """
+
+    gettext_inline_tags = getattr(settings, 'DBGETTEXT_INLINE_TAGS', 
+                                   ('b','i','u','em','strong',))
+
+    def __init__(self, type, raw, name, attributes=None):
+        super(Tag, self).__init__(type, raw)
+        self.name = name
+        self.attributes = attributes
+
+    def is_translatable(self):
+        return self.name in Tag.gettext_inline_tags
+
+
+def html_gettext(obj, attribute, export=False):
+    """ Extracts translatable strings from HTML content
+    
+    Returns original content with ugettext applied to translatable parts.
+
+    If export is True, returns a list of translatable strings only.
+
+    """
+
+    options = registry._registry[type(obj)]
+    content = getattr(obj, attribute)
+
+    from django.utils.translation import ugettext as _
+    # lazy / string_concat don't seem to work how I want...
+
+    def ignore(scanner, token):
+        return Token('ignore', token)
+
+    def open_tag(scanner, token):
+        return Tag('open', token, scanner.match.groups()[0])
+
+    def close_tag(scanner, token):
+        return Tag('close', token, scanner.match.groups()[0])
+
+    def empty_tag(scanner, token):
+        return Tag('empty', token, scanner.match.groups()[0])
+
+    def open_tag_with_attributes(scanner, token):
+        return Tag(*(('open', token,) + scanner.match.groups()[:2]))
+
+    def empty_tag_with_attributes(scanner, token):
+        return Tag(*(('empty', token,) + scanner.match.groups()[:2]))
+
+    def text(scanner, token):
+        return Token('text', token)
+
+    def whitespace(scanner, token):
+        return Token('whitespace', token)
+
+    ignored = [
+        (r'<!--.*?-->', ignore),
+        (r'<script.*?/script>', ignore),
+    ]
+
+    custom = getattr(options, 'custom_lexicon_rules', [])
+
+    tags = [
+        (r'<\s*/\s*([^>]*?)\s*>', close_tag),
+        (r'<\s*([^>]*?)\s*/\s*>', empty_tag),
+        (r'<\s*([a-zA-Z]+)\s+([^\s>][^>]*?)\s*>', 
+         open_tag_with_attributes),
+        (r'<\s*([a-zA-Z]+)\s+([^\s>][^>]*?)\s*/\s*>', 
+         empty_tag_with_attributes),
+        (r'<\s*([^>]*?)\s*>', open_tag),
+    ]
+
+    whitespace = [
+        (r'\s+', whitespace),
+        (r'&nbsp;', whitespace),
+    ]
+
+    text = [
+        (r'[^<>]*[^\s<>]', text),
+    ]
+    
+    lexicon = getattr(options, 'custom_lexicon', 
+                      ignored + custom + tags + whitespace + text)
+
+    scanner = re.Scanner(lexicon, re.DOTALL)
+    tokens, remainder = scanner.scan(content)
+
+    gettext = []
+    output = []
+    current_string = []
+
+    def token_list_contains_text(token_list):
+        for t in token_list:
+            if t.type == 'text':
+                return True
+        return False
+
+    def gettext_from_token_list(token_list):
+        """ Process token list into format string, parameters and remainder """
+        format, params, remainder = '', {}, ''
+        # remove any trailing whitespace
+        while token_list[-1].type == 'whitespace':
+            remainder = token_list.pop().raw + remainder
+        for t in token_list:
+            if hasattr(t, 'get_key'): 
+                format += '%%(%s)s' % t.get_key()
+                params[t.get_key()] = t.raw
+            else:
+                format += t.raw
+        return format, params, remainder
+
+    for t in tokens + [Tag('empty', '', '')]:
+        if current_string:
+            # in the middle of building a translatable string
+            if t.is_translatable():
+                current_string.append(t)
+            else:
+                # end of translatable token sequence, check for text content
+                if token_list_contains_text(current_string):
+                    format, params, trailing_whitespace = \
+                        gettext_from_token_list(current_string)
+                    gettext.append(format)
+                    try:
+                        output.append(_(format) % params)
+                    except KeyError:
+                        # translator edited placeholder names? Fallback:
+                        output.append(format % params)
+                    output.append(trailing_whitespace)
+                else:
+                    # should not be translated, raw output only
+                    output.append(''.join([x.raw for x in current_string]))
+                # empty for next time:
+                current_string = []
+                # don't forget current token also:
+                output.append(t.raw)
+        else:
+            # should we start a new translatable string?
+            if t.is_translatable() and t.type != 'whitespace':
+                current_string.append(t)
+            else:
+                output.append(t.raw)             
+
+    if export:
+        if remainder:
+            raise Exception, 'scanner got stuck on: "%s"(...)' % remainder[:10]
+        return gettext
+    else:
+        return ''.join(output)
Add a comment to this file

dbgettext/management/__init__.py

Empty file added.

Add a comment to this file

dbgettext/management/commands/__init__.py

Empty file added.

dbgettext/management/commands/dbgettext_export.py

+from django.conf import settings
+from django.core.management.base import NoArgsCommand, CommandError
+from shutil import rmtree
+import os
+from dbgettext.registry import registry
+from dbgettext.html import html_gettext
+
+def recursive_getattr(obj, attr, default=None, separator='__'):
+    """ Allows getattr(obj, 'related_class__property__subproperty__etc') """
+    try:
+        if attr.find(separator) > 0:
+            bits = attr.split(separator)
+            return recursive_getattr(getattr(obj, bits[0]), 
+                                     separator.join(bits[1:]), default)
+        else:
+            return getattr(obj, attr)
+    except AttributeError:
+        return default
+
+
+def get_field_or_callable_content(obj, attr_name):
+    try:
+        attr = getattr(obj, attr_name)
+    except AttributeError:
+        raise
+
+    if callable(attr):
+        return attr()
+    else:
+        return attr
+
+
+def build_queryset(model, options, queryset=None, trail=[]):
+    if queryset is None:
+        queryset = model.objects.all()
+
+    recursive_criteria = {}
+    for c in options.translate_if:
+        recursive_criteria['__'.join(trail+[c])] = options.translate_if[c]
+    queryset = queryset.filter(**recursive_criteria)
+
+    if options.parent:
+        parent_model = getattr(model,options.parent).field.related.parent_model
+        try:
+            parent_options = registry._registry[parent_model]
+        except:
+            raise Exception, "%s.%s is not registered with dbgettext" \
+                % (model, options.parent)
+        queryset = build_queryset(parent_model, parent_options, 
+                                  queryset, trail+[options.parent])
+
+    return queryset
+
+
+def build_path(obj):
+    model = type(obj)
+    options = registry._registry[model]
+    if options.parent:
+        path = build_path(getattr(obj, options.parent))
+    else:
+        path = os.path.join(model._meta.app_label, model._meta.module_name)
+    return os.path.join(path, options.get_path_identifier(obj))
+
+
+class Command(NoArgsCommand):
+    path = getattr(settings, 'DBGETTEXT_PATH', 'locale/')
+    root = getattr(settings, 'DBGETTEXT_ROOT', 'dbgettext')
+
+    def handle_noargs(self, **options):
+        if not os.path.exists(self.path):
+            raise CommandError('This command must be run from the project '
+                               'root directory, and the %s '
+                               '(settings.DBGETTEXT_PATH) directory must '
+                               'exist.' % self.path)
+        self.gettext()
+
+    help = ('Extract translatable strings from models in database '
+            'and store in static files for makemessages to pick up.')
+
+    def gettext(self):
+        """ Export translatable strings from models into static files """
+
+        def write(file, string):
+            print file.name, ':', string
+            string = string.replace('"','\\"') # prevent """"
+            string = string.encode('utf8')
+            file.write(u'gettext("""%s""")\n' % string)
+
+        root = os.path.join(self.path, self.root)
+
+        # remove any old files
+        if os.path.exists(root):
+            rmtree(root) 
+
+        # for each registered model:
+        for model, options in registry._registry.items():
+            for obj in build_queryset(model, options):
+                path = os.path.join(root, build_path(obj))
+                if not os.path.exists(path): 
+                    os.makedirs(path)
+                # for each translatable attribute:
+                for attr_name in options.attributes:
+                    # write contents to <attr_name>.py
+                    attr = get_field_or_callable_content(obj, attr_name)
+                    if attr:
+                        f = open(os.path.join(path, '%s.py' % attr_name), 'w')
+                        write(f, attr)
+                        f.close()
+
+                for attr_name in options.html_attributes:
+                    f = open(os.path.join(path, '%s.py' % attr_name), 'w')
+                    for s in html_gettext(obj, attr_name, export=True):
+                        write(f, s)
+                    f.close()                    

dbgettext/models.py

+class Options(object):
+    """
+    Encapsulates dbgettext options for a given model 
+
+    - attributes: 
+        tuple of names of fields/callables to be translated
+    - html_attributes: 
+        tuple of names of fields/callables with HTML content which should have 
+        translatable content extracted (should not be listed in attributes)
+    - translate_if:
+        dictionary used to filter() queryset 
+    - get_path_identifier:
+        function returning string used to identify object in path to exported 
+        content (given an object)
+    - parent:
+        name of foreign key to parent model, if registered. Affects:
+        - path (path_identifier appended onto parent path)
+        - queryset (object only translated if parent is)
+    - custom_lexicon_rules
+        list of extra custom rules ((regexp, function) tuples) to be applied
+        when parsing HTML -- see html.py
+    - custom_lexicon:
+        complete list of rules ((regexp, function) tuples) for parsing HTML 
+         -- see html.py
+
+    """
+
+    attributes = ()
+    html_attributes = ()
+    translate_if = {}
+    parent = None
+    
+    def get_path_identifier(self, obj):
+        return '%s_%d' % (obj._meta.object_name, obj.id)

dbgettext/registry.py

+from django.db.models.base import ModelBase
+from models import Options
+
+# Registration code based on django.contrib.admin.sites
+
+class AlreadyRegistered(Exception):
+    pass
+
+class NotRegistered(Exception):
+    pass
+
+class Registry(object):
+    """
+    A Registry object is used to register() models for dbgettext exporting,
+    together with their associated options.
+    """
+
+    def __init__(self):
+        self._registry = {} # model_class class -> Options subclass
+
+    def register(self, model_or_iterable, options_class, **options):
+        """
+        Registers the given model(s) with the given admin class.
+
+        The model(s) should be Model classes, not instances.
+
+        If a model is already registered, this will raise AlreadyRegistered.
+        """
+
+        if isinstance(model_or_iterable, ModelBase):
+            model_or_iterable = [model_or_iterable]
+        for model in model_or_iterable:
+            if model in self._registry:
+                raise AlreadyRegistered(
+                    'The model %s is already registered' % model.__name__)
+
+            self._registry[model] = options_class() # instantiated
+
+    def unregister(self, model_or_iterable):
+        """
+        Unregisters the given model(s).
+
+        If a model isn't already registered, this will raise NotRegistered.
+        """
+        if isinstance(model_or_iterable, ModelBase):
+            model_or_iterable = [model_or_iterable]
+        for model in model_or_iterable:
+            if model not in self._registry:
+                raise NotRegistered(
+                    'The model %s is not registered' % model.__name__)
+            del self._registry[model]
+
+
+# Global Registry object
+registry = Registry()
+#!/usr/bin/env python
+
+from distutils.core import setup
+
+setup(name='django-dbgettext',
+      version='0.1',
+      description='Translate Django models by extracting data for gettext',
+      author='Simon Meers',
+      author_email='simon@simonmeers.com',
+      url='http://bitbucket.org/drmeers/django-dbgettext/wiki',
+      packages=['dbgettext', 'dbgettext.management', 
+                'dbgettext.management.commands'],
+     )
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.