Commits

Dmitri Krasnenko committed dbb7ca2

Search & message handlenig added

Comments (0)

Files changed (25)

haditbefore/search_indexes.py

+import search
+from search.core import porter_stemmer
+from haditbefore.models import Problem
+
+__author__ = 'dkrasnenko'
+
+search.register(Problem, 'description', indexer=porter_stemmer)

haditbefore/services.py

-from django.http import HttpResponse
+import logging
+from django.contrib.auth.models import User
+from django.shortcuts import get_object_or_404
+from django.http import HttpResponse, Http404
 from google.appengine.api import xmpp
+from haditbefore.models import Problem
+from search.core import search
 
 
 def handle_message(request):
+    sender = Sender(request.POST)
     message = xmpp.Message(request.POST)
-    message.reply("")
-    return HttpResponse("")
+
+    #Get similar problems
+    similar_problems = get_similar_problem_list(request, message.body)
+
+    #Save new problem
+    problem = Problem(description=message.body, reported_by=get_object_or_404(User, username=sender.username))
+    problem.save()
+
+    #Reply
+    message.reply("Thanks for reporting the error. You can add more details here: http://%s/admin/haditbefore/problem/%d/\n%s" % (get_current_site(request), problem.id, similar_problems))
+
+    return HttpResponse()
+
+def handle_available(request):
+    #Handle GTalk 'user available' notification
+    logging.info(request.META)
+    sender = Sender(request.POST)
+    handle_online_status(request, sender)
+    return HttpResponse()
+
+def register_unknown_user(fxn):
+    def inner (request, sender):
+        try:
+            return fxn(request, sender)
+        except Http404:
+            try:
+                password = sender.username
+
+                # Create user
+                user = User.objects.create_user(
+                        sender.username,
+                        sender.email,
+                        password
+                )
+
+                logging.info(
+                        "User %s has been successfully created." % user.username
+                )
+
+                #Notify new user
+                xmpp.send_message(sender.email, "Congrats! You are in and your user name and password both are %s. You can change your password here: http://%s/admin/password_change/" % (sender.username, request.META['SERVER_NAME']))
+            except:
+                logging.error(
+                        "Unable to create user %s." % sender.username
+                )
+
+                #Notify the sender
+                xmpp.send_message(sender.email, "Sorry, user %s can't be added. Try again later.")
+
+        return HttpResponse()
+
+    return inner
+
+@register_unknown_user
+def handle_online_status(request, sender):
+    user = get_object_or_404(User, username=sender.username, email=sender.email)
+    logging.info("User %s came online" % user.username)
+    return HttpResponse()
+
+def get_current_site(request):
+    return request.META['SERVER_NAME']
+
+def get_similar_problem_list(request, description):
+    site = get_current_site(request)
+    similar_problems = search(Problem, description)
+    logging.info("Found %d similar problem(s)" % len(similar_problems))
+
+    if similar_problems:
+        return "There are similar problems below\n:%s" % ("\n".join(["User %s had it before: http://%s/admin/haditbefore/problem/%d" % (problem.reported_by.email, site, problem.id) for problem in similar_problems]))
+    else:
+        return "There are no similar problems found."
+
+class Sender(object):
+    def __init__(self, vars):
+        self.__email = vars['from'].split('/')[0]
+        self.__user_session_id = vars['from']
+        self.__username = self.__email.split('@')[0]
+
+    @property
+    def email(self):
+        return self.__email
+
+    @property
+    def user_session_id(self):
+        return self.__user_session_id
+
+    @property
+    def username(self):
+        return self.__username

haditbefore/templates/view_problems.html

+{% extends 'admin/base_site.html' %}
+
+{% block content %}
+<p>
+<dl>
+{% for problem in problems %}
+<dt><h1>Problem {{ problem.id }}:</h1>
+<dd><h2>{{ problem.description }}</h2></dd>
+<dd>Reported by:<br/>{{ problem.reported_by }}</dd>
+<dd>Framework:<br/>{{ problem.framework }}</dd>
+<dd>Programming language:<br/>{{ problem.programming_language }}</dd>
+{% endfor %}
+</dl>
+{% endblock content %}
-from dbindexer import autodiscover
-autodiscover()
+import search
+import dbindexer
+
+dbindexer.autodiscover()
+
+search.autodiscover()

search/__init__.py

+from search.core import default_splitter, site_language, SearchManager, \
+    install_index_model
+from autoload import autodiscover as auto_discover
+
+def autodiscover():
+    auto_discover('search_indexes')
+
+def register(model, fields_to_index, search_index='search_index',
+    indexer=None, splitter=default_splitter, relation_index=True, integrate='*',
+    filters={}, language=site_language, **kwargs):
+
+    """
+    Add a search manager to the model.
+    """
+
+    if not hasattr(model, '_meta'):
+        raise AttributeError('The model being registered must derive from Model.')
+
+    if hasattr(model, search_index):
+        raise AttributeError('The model being registered already defines a'
+            ' property called %s.' % search_index)
+
+    model.add_to_class(search_index, SearchManager(fields_to_index, indexer,
+        splitter, relation_index, integrate, filters, language, **kwargs))
+
+    install_index_model(model)

search/backends/__init__.py

Empty file added.

search/backends/gae_background_tasks.py

+from django.conf import settings
+from django.db import models
+from google.appengine.ext import deferred
+
+default_search_queue = getattr(settings, 'DEFAULT_SEARCH_QUEUE', 'default')
+
+def update_relation_index(search_manager, parent_pk, delete):
+    # pass only the field / model names to the background task to transfer less
+    # data
+    app_label = search_manager.model._meta.app_label
+    object_name = search_manager.model._meta.object_name
+    deferred.defer(update, app_label, object_name, search_manager.name,
+        parent_pk, delete, _queue=default_search_queue)
+
+def update(app_label, object_name, manager_name, parent_pk, delete):
+    model = models.get_model(app_label, object_name)
+    manager = getattr(model, manager_name)
+    manager.update_relation_index(parent_pk, delete)

search/backends/immediate_update.py

+def update_relation_index(search_manager, parent_pk, delete):
+    search_manager.update_relation_index(parent_pk, delete)
+from django.conf import settings
+from django.core.exceptions import ObjectDoesNotExist
+from django.db import models
+from django.db.models import signals
+from djangotoolbox.fields import ListField
+from djangotoolbox.utils import getattr_by_path
+from copy import deepcopy
+import re
+import string
+
+_PUNCTUATION_REGEX = re.compile(
+    '[' + re.escape(string.punctuation.replace('-', '').replace(
+        '_', '').replace('#', '')) + ']')
+_PUNCTUATION_SEARCH_REGEX = re.compile(
+    '[' + re.escape(string.punctuation.replace('_', '').replace(
+        '#', '')) + ']')
+
+# Various base indexers
+def startswith(words, indexing, **kwargs):
+    """Allows for word prefix search."""
+    if not indexing:
+        # In search mode we simply match search terms exactly
+        return words
+    # In indexing mode we add all prefixes ('h', 'he', ..., 'hello')
+    result = []
+    for word in words:
+        result.extend([word[:count].strip(u'-')
+                       for count in range(1, len(word)+1)])
+    return result
+
+def porter_stemmer(words, language, **kwargs):
+    """Porter-stemmer in various languages."""
+    languages = [language,]
+    if '-' in language:
+        languages.append(language.split('-')[0])
+
+    # Fall back to English
+    languages.append('en')
+
+    # Find a stemmer for this language
+    for language in languages:
+        try:
+            stem = __import__('search.porter_stemmers.%s' % language,
+                                 {}, {}, ['']).stem
+        except:
+            continue
+        break
+
+    result = []
+    for word in words:
+        result.append(stem(word))
+    return result
+
+stop_words = {
+    'en': set(('a', 'an', 'and', 'or', 'the', 'these', 'those', 'whose', 'to')),
+    'de': set(('ein', 'eine', 'eines', 'einer', 'einem', 'einen', 'den',
+               'der', 'die', 'das', 'dieser', 'dieses', 'diese', 'diesen',
+               'deren', 'und', 'oder'))
+}
+
+def get_stop_words(language):
+    if language not in stop_words and '-' in language:
+        language = language.split('-', 1)[0]
+    return stop_words.get(language, set())
+
+def non_stop(words, indexing, language, **kwargs):
+    """Removes stop words from search query."""
+    if indexing:
+        return words
+    return list(set(words) - get_stop_words(language))
+
+def porter_stemmer_non_stop(words, **kwargs):
+    """Combines porter_stemmer with non_stop."""
+    return porter_stemmer(non_stop(words, **kwargs), **kwargs)
+
+# Language handler
+def site_language(instance, **kwargs):
+    """The default language handler tries to determine the language from
+    fields in the model instance."""
+
+    # Check if there's a language attribute
+    if hasattr(instance, 'language'):
+        return instance.language
+    if hasattr(instance, 'lang'):
+        return instance.lang
+
+    # Fall back to default language
+    return settings.LANGUAGE_CODE
+
+def default_splitter(text, indexing=False, **kwargs):
+    """
+    Returns an array of  keywords, that are included
+    in query. All character besides of letters, numbers
+    and '_' are split characters. The character '-' is a special
+    case: two words separated by '-' create an additional keyword
+    consisting of both words without separation (see example).
+
+    Examples:
+    - text='word1/word2 word3'
+      returns ['word1', 'word2', word3]
+    - text='word1/word2-word3'
+      returns ['word1', 'word2', 'word3', 'word2word3']
+    """
+    if not text:
+        return []
+    if not indexing:
+        return _PUNCTUATION_SEARCH_REGEX.sub(u' ', text.lower()).split()
+    keywords = []
+    for word in set(_PUNCTUATION_REGEX.sub(u' ', text.lower()).split()):
+        if not word:
+            continue
+        if '-' not in word:
+            keywords.append(word)
+        else:
+            keywords.extend(get_word_combinations(word))
+    return keywords
+
+def get_word_combinations(word):
+    """
+    'one-two-three'
+    =>
+    ['one', 'two', 'three', 'onetwo', 'twothree', 'onetwothree']
+    """
+    permutations = []
+    parts = [part for part in word.split(u'-') if part]
+    for count in range(1, len(parts) + 1):
+        for index in range(len(parts) - count + 1):
+            permutations.append(u''.join(parts[index:index+count]))
+    return permutations
+
+class DictEmu(object):
+    def __init__(self, data):
+        self.data = data
+    def __getitem__(self, key):
+        return getattr(self.data, key)
+
+# IndexField is a (String)ListField storing indexed fields of a model_instance
+class IndexField(ListField):
+    def __init__(self, search_manager, *args, **kwargs):
+        self.search_manager = search_manager
+        kwargs['item_field'] = models.CharField(max_length=500)
+        kwargs['editable'] = False
+        super(IndexField, self).__init__(*args, **kwargs)
+
+    def pre_save(self, model_instance, add):
+        if self.search_manager.filters and not \
+                self.search_manager.should_index(DictEmu(model_instance)):
+            return []
+
+        language = self.search_manager.language
+        if callable(language):
+            language = language(model_instance, property=self)
+
+        index = []
+        for field_name in self.search_manager.fields_to_index:
+            values = getattr_by_path(model_instance, field_name, None)
+            if not values:
+                values = ()
+            elif not isinstance(values, (list, tuple)):
+                values = (values,)
+            for value in values:
+                index.extend(self.search_manager.splitter(value, indexing=True,
+                    language=language))
+        if self.search_manager.indexer:
+            index = self.search_manager.indexer(index, indexing=True,
+                language=language)
+        # Sort index to make debugging easier
+        setattr(model_instance, self.search_manager.search_list_field_name,
+            sorted(set(index)))
+        return index
+
+class SearchManager(models.Manager):
+    """
+    Simple full-text manager adding a search function.
+
+    If "relation_index" is True the index will be stored in a separate entity.
+
+    With "integrate" you can add fields to your relation index,
+    so they can be searched, too.
+
+    With "filters" you can specify when a values index should be created.
+    """
+    def __init__(self, fields_to_index, indexer=None, splitter=default_splitter,
+            relation_index=True, integrate='*', filters={},
+            language=site_language, **kwargs):
+        # integrate should be specified when using the relation index otherwise
+        # we doublicate the amount of data in the datastore and the relation
+        # index makes no sense any more
+        # TODO: filters has to be extended (maybe a function) to allow Django's
+        # QuerySet methods like exclude
+        if integrate is None:
+            integrate = ()
+        if integrate == '*' and not relation_index:
+            integrate = ()
+        if isinstance(fields_to_index, basestring):
+            fields_to_index = (fields_to_index,)
+        self.fields_to_index = fields_to_index
+        if isinstance(integrate, basestring):
+            integrate = (integrate,)
+        self.filters = filters
+        self.integrate = integrate
+        self.splitter = splitter
+        self.indexer = indexer
+        self.language = language
+        self.relation_index = relation_index
+        if len(fields_to_index) == 0:
+            raise ValueError('No fields specified for index!')
+        # search_list_field_name will be set if no relation_index is used that is
+        # for relation_index=False or for the relation_index_model itself
+        self.search_list_field_name = ''
+        super(SearchManager, self).__init__(**kwargs)
+
+    def contribute_to_class(self, model, name):
+        super(SearchManager, self).contribute_to_class(model, name)
+        # set default_manager to None such that the default_manager will be set
+        # to 'objects' via the class-prepared signal calling
+        # ensure_default_manager
+#        setattr(model, '_default_manager', None)
+        self.name = name
+        # add IndexField to the model if we do not use the relation_index
+        if not self.relation_index:
+            self.search_list_field_name = "%s_search_list_field" %name
+            # Add field to class dynamically
+            setattr(model, self.search_list_field_name, IndexField(self))
+            getattr(model, self.search_list_field_name).contribute_to_class(
+                model, self.search_list_field_name)
+
+    def filter(self, values):
+        """
+        Returns a query for the given values (creates '=' filters for the
+        IndexField. Additionally filters can be applied afterwoods via chaining.
+        """
+        if not isinstance(values, (tuple, list)):
+            values = (values,)
+        filtered = self.model.objects.all()
+        for value in set(values):
+            filter = {self.search_list_field_name:value}
+            filtered = filtered.filter(**filter)
+        return filtered
+
+    def _search(self, query, indexer=None, splitter=None,
+            language=settings.LANGUAGE_CODE):
+        if not splitter:
+            splitter = default_splitter
+        words = splitter(query, indexing=False, language=language)
+        if indexer:
+            words = indexer(words, indexing=False, language=language)
+        # Optimize query
+        words = set(words)
+        if len(words) >= 4:
+            words -= get_stop_words(language)
+        # Don't allow empty queries
+        if not words and query:
+            # This query will never find anything
+            return self.filter(()).filter({self.search_list_field_name:' '})
+        return self.filter(sorted(words))
+
+    def should_index(self, values):
+        # Check if filter doesn't match
+        if not values:
+            return False
+        for filter, value in self.filters.items():
+            attr, op = filter, 'exact'
+            if '__' in filter:
+                attr, op = filter.rsplit('__', 1)
+            op = op.lower()
+            if (op == 'exact' and values[attr] != value or
+#                    op == '!=' and values[attr] == value or
+                    op == 'in' and values[attr] not in value or
+                    op == 'lt' and values[attr] >= value or
+                    op == 'lte' and values[attr] > value or
+                    op == 'gt' and values[attr] <= value or
+                    op == 'gte' and values[attr] < value):
+                return False
+            elif op not in ('exact', 'in', 'lt', 'lte', 'gte', 'gt'):
+                raise ValueError('Invalid search index filter: %s %s' % (filter, value))
+        return True
+
+#    @commit_locked
+    def update_relation_index(self, parent_pk, delete=False):
+        relation_index_model = self._relation_index_model
+        try:
+            index = relation_index_model.objects.get(pk=parent_pk)
+        except ObjectDoesNotExist:
+            index = None
+
+        if not delete:
+            try:
+                parent = self.model.objects.get(pk=parent_pk)
+            except ObjectDoesNotExist:
+                parent = None
+
+            values = None
+            if parent:
+                values = self.get_index_values(parent)
+
+        # Remove index if it's not needed, anymore
+        if delete or not self.should_index(values):
+            if index:
+                index.delete()
+            return
+
+        # Update/create index
+        if not index:
+            index = relation_index_model(pk=parent_pk, **values)
+
+        # This guarantees that we also set virtual @properties
+        for key, value in values.items():
+            setattr(index, key, value)
+
+        index.save()
+
+    def create_index_model(self):
+        attrs = dict(__module__=self.__module__)
+        # By default we integrate everything when using relation index
+        # manager will add the IndexField to the relation index automaticaly
+        if self.integrate == ('*',):
+            self.integrate = tuple(field.name
+                                   for field in self.model._meta.fields
+                                   if not isinstance(field, IndexField))
+
+        for field_name in self.integrate:
+            field = self.model._meta.get_field_by_name(field_name)[0]
+            field = deepcopy(field)
+            attrs[field_name] = field
+            if isinstance(field, models.ForeignKey):
+                attrs[field_name].rel.related_name = '_sidx_%s_%s_%s_set_' % (
+                    self.model._meta.object_name.lower(),
+                    self.name, field_name,
+                )
+
+        owner = self
+        def __init__(self, *args, **kwargs):
+            # Save some space: don't copy the whole indexed text into the
+            # relation index field unless the field gets integrated.
+            field_names = [field.name for field in self._meta.fields]
+            owner_field_names = [field.name
+                                 for field in owner.model._meta.fields]
+            for key, value in kwargs.items():
+                if key in field_names or key not in owner_field_names:
+                    continue
+                setattr(self, key, value)
+                del kwargs[key]
+            models.Model.__init__(self, *args, **kwargs)
+        attrs['__init__'] = __init__
+
+        self._relation_index_model = type(
+            'RelationIndex_%s_%s_%s' % (self.model._meta.app_label,
+                                        self.model._meta.object_name,
+                                        self.name),
+            (models.Model,), attrs)
+        self._relation_index_model.add_to_class(self.name, SearchManager(
+            self.fields_to_index, splitter=self.splitter, indexer=self.indexer,
+            language=self.language, relation_index=False))
+
+    def get_index_values(self, parent):
+        filters = []
+        for filter in self.filters.keys():
+            if '__' in filter:
+                filters.append(filter.rsplit('__')[0])
+            else:
+                filters.append(filter)
+        filters = tuple(filters)
+        values = {}
+        for field_name in set(self.fields_to_index + self.integrate + filters):
+            field = self.model._meta.get_field_by_name(field_name)[0]
+            if isinstance(field, models.ForeignKey):
+                value = field.pre_save(parent, False)
+            else:
+                value = getattr(parent, field_name)
+            if field_name == self.fields_to_index[0] and \
+                    isinstance(value, (list, tuple)):
+                value = sorted(value)
+            if isinstance(field, models.ForeignKey):
+                values[field.column] = value
+            else:
+                values[field_name] = value
+        return values
+
+    def search(self, query, language=settings.LANGUAGE_CODE):
+        if self.relation_index:
+            items = getattr(self._relation_index_model, self.name).search(query,
+                language=language).values('pk')
+            return RelationIndexQuery(self.model, items)
+        return self._search(query, splitter=self.splitter,
+            indexer=self.indexer, language=language)
+
+def load_backend():
+    backend = getattr(settings, 'SEARCH_BACKEND', 'search.backends.immediate_update')
+    import_list = []
+    if '.' in backend:
+        import_list = [backend.rsplit('.', 1)[1]]
+    return __import__(backend, globals(), locals(), import_list)
+
+def post(delete, sender, instance, **kwargs):
+    for counter, manager_name, manager in sender._meta.concrete_managers:
+        if isinstance(manager, SearchManager):
+            if manager.relation_index:
+                backend = load_backend()
+                backend.update_relation_index(manager, instance.pk, delete)
+
+def post_save(sender, instance, **kwargs):
+    post(False, sender, instance, **kwargs)
+
+def post_delete(sender, instance, **kwargs):
+    post(True, sender, instance, **kwargs)
+
+def install_index_model(sender, **kwargs):
+    needs_relation_index = False
+    # what to do for abstract_managers?
+    for counter, manager_name, manager in sender._meta.concrete_managers:
+        if isinstance(manager, SearchManager) and manager.relation_index:
+            manager.create_index_model()
+            needs_relation_index = True
+    if needs_relation_index:
+        signals.post_save.connect(post_save, sender=sender)
+        signals.post_delete.connect(post_delete, sender=sender)
+#signals.class_prepared.connect(install_index_model)
+
+class QueryTraits(object):
+    def __iter__(self):
+        return iter(self[:301])
+
+    def __len__(self):
+        return self.count()
+
+    def get(self, *args, **kwargs):
+        result = self[:1]
+        if result:
+            return result[0]
+        raise ObjectDoesNotExist
+
+class RelationIndexQuery(QueryTraits):
+    """Combines the results of multiple queries by appending the queries in the
+    given order."""
+    def __init__(self, model, query):
+        self.model = model
+        self.query = query
+
+    def order_by(self, *args, **kwargs):
+        self.query = self.query.order_by(*args, **kwargs)
+        return self
+
+    def filter(self, *args, **kwargs):
+        self.query = self.query.filter(*args, **kwargs)
+        return self
+
+    def __getitem__(self, index):
+        pks_slice = index
+        if not isinstance(index, slice):
+            pks_slice = slice(None, index + 1, None)
+
+        pks = [instance.pk if isinstance(instance, models.Model) else instance['pk']
+                for instance in self.query[pks_slice]]
+        if not isinstance(index, slice):
+            return self.model.objects.filter(pk__in=pks)[index]
+        return self.model.objects.filter(pk__in=pks)[pks_slice]
+#        return [item for item in self.model.objects.filter(
+#            pk__in=pks) if item]
+        
+
+    def count(self):
+        return self.query.count()
+
+    # TODO: add keys_only query
+#    def values(self, fields):
+#        pass
+
+def search(model, query, language=settings.LANGUAGE_CODE,
+        search_index='search_index'):
+    return getattr(model, search_index).search(query, language)
+from django import forms
+
+class LiveSearchField(forms.CharField):
+    def __init__(self, src, multiple_values=False, select_first=False,
+                 auto_fill=False, must_match=False, match_contains=True,
+                 **kwargs):
+        attrs = {'src': src}
+        classes = []
+        if multiple_values:
+            classes.append('multiple-values')
+        if select_first:
+            classes.append('select-first')
+        if auto_fill:
+            classes.append('auto-fill')
+        elif match_contains:
+            classes.append('match-contains')
+        if must_match:
+            classes.append('must-match')
+        if classes:
+            attrs['class'] = ' '.join(classes)
+        super(LiveSearchField, self).__init__(
+            widget=forms.TextInput(attrs=attrs), **kwargs)

search/models.py

Empty file added.

search/porter_stemmers/__init__.py

Empty file added.

search/porter_stemmers/de.py

+# -*- coding: utf-8 -*-
+#   Eine Pythonimplementation des Porter-Stemmers für Deutsch (Orginal unter http://snowball.tartarus.org/texts/germanic.html)
+#
+#   Modifiziert/optimiert/gefixt von Waldemar Kornewald
+#
+#   Ersteller dieser Version: (c) by kristall 'ät' c-base.org       http://kristall.crew.c-base.org/porter_de.py
+#
+#   Der Algorithmus in (englischem) Prosa unter http://snowball.tartarus.org/algorithms/german/stemmer.html
+#
+#   Wikipedia zum Porter-Stemmer: http://de.wikipedia.org/wiki/Porter-Stemmer-Algorithmus
+#
+#   Lizenz: Diese Software steht unter der BSD License (siehe http://www.opensource.org/licenses/bsd-license.html).
+#   Ursprünglicher Autor: (c) by Dr. Martin Porter 
+#
+#
+###
+
+#   Wer mit Strings arbeitet, sollte dieses Modul laden
+import string
+
+#   Die Stopliste; Wörter in dieser Liste werden nicht 'gestemmt', wenn stop  = 'True' an die Funktion übergeben wurde
+stopliste = (u'aber', u'alle', u'allem', u'allen', u'aller', u'alles', u'als', u'also', u'am', u'an', u'ander', u'andere', u'anderem',
+        u'anderen', u'anderer', u'anderes', u'anderm', u'andern', u'anders', u'auch', u'auf', u'aus', u'bei', u'bin', u'bis', u'bist',
+        u'da', u'damit', u'dann', u'der', u'den', u'des', u'dem', u'die', u'das', u'dass', u'daß', u'derselbe', u'derselben', u'denselben',
+        u'desselben', u'demselben', u'dieselbe', u'dieselben', u'dasselbe', u'dazu', u'dein', u'deine', u'deinem', u'deinen', u'deiner',
+        u'deines', u'denn', u'derer', u'dessen', u'dich', u'dir', u'du', u'dies', u'diese', u'diesem', u'diesen', u'dieser', u'dieses',
+        u'doch', u'dort', u'durch', u'ein', u'eine', u'einem', u'einen', u'einer', u'eines', u'einig', u'einige', u'einigem', u'einigen', 
+        u'einiger', u'einiges', u'einmal', u'er', u'ihn', u'ihm', u'es', u'etwas', u'euer', u'eure', u'eurem', u'euren', u'eurer', u'eures',
+        u'für', u'gegen', u'gewesen', u'hab', u'habe', u'haben', u'hat', u'hatte', u'hatten', u'hier', u'hin', u'hinter', u'ich', u'mich',
+        u'mir', u'ihr', u'ihre', u'ihrem', u'ihren', u'ihrer', u'ihres', u'euch', u'im', u'in', u'indem', u'ins', u'ist', u'jede', u'jedem',
+        u'jeden', u'jeder', u'jedes', u'jene', u'jenem', u'jenen', u'jener', u'jenes', u'jetzt', u'kann', u'kein', u'keine', u'keinem', 
+        u'keinen', u'keiner', u'keines', u'können', u'könnte', u'machen', u'man', u'manche', u'manchem', u'manchen', u'mancher', 
+        u'manches', u'mein', u'meine', u'meinem', u'meinen', u'meiner', u'meines', u'mit', u'muss', u'musste', u'muß', u'mußte', u'nach',
+        u'nicht', u'nichts', u'noch', u'nun', u'nur', u'ob', u'oder', u'ohne', u'sehr', u'sein', u'seine', u'seinem', u'seinen', u'seiner',
+        u'seines', u'selbst', u'sich', u'sie', u'ihnen', u'sind', u'so', u'solche', u'solchem', u'solchen', u'solcher', u'solches', u'soll',
+        u'sollte', u'sondern', u'sonst', u'über', u'um', u'und', u'uns', u'unse', u'unsem', u'unsen', u'unser', u'unses', u'unter', u'viel',
+        u'vom', u'von', u'vor', u'während', u'war', u'waren', u'warst', u'was', u'weg', u'weil', u'weiter', u'welche', u'welchem', 
+        u'welchen', u'welcher', u'welches', u'wenn', u'werde', u'werden', u'wie', u'wieder', u'will', u'wir', u'wird', u'wirst', u'wo',
+        u'wollem', u'wollte', u'würde', u'würden', u'zu', u'zum', u'zur', u'zwar', u'zwischen')
+
+#   Die Funktion stem nimmt ein Wort und versucht dies durch Regelanwendung zu verkürzen. Wenn Stop auf 'True' gesetzt wird, werden Wörter in der Stopliste nicht 'gestemmt'.
+def stem(wort, stop=True):
+    #   ACHTUNG: für den Stemmer gilt 'y' als Vokal.
+    vokale = u'aeiouyäüö'
+    #   ACHTUNG: 'U' und 'Y' gelten als Konsonaten.
+    konsonanten = u'bcdfghjklmnpqrstvwxzßUY'
+    #   Konsonanten die vor einer 's'-Endung stehen dürfen.
+    s_endung = u'bdfghklmnrt'
+    #   Konsonanten die vor einer 'st'-Endung stehen dürfen.
+    st_endung = u'bdfghklmnt'
+    #   Zu r1 & r2 siehe http://snowball.tartarus.org/texts/r1r2.html, p1 & p2 sind die Start'p'ositionen von r1 & r2 im String
+    r1 = u''
+    p1 = 0
+    r2 = u''
+    p2 = 0
+    #   Wortstämme werden klein geschrieben
+    wort = wort.lower()
+    #   Wenn 'stop' und Wort in Stopliste gib 'wort' zurück 
+    if stop == True and wort in stopliste:
+        return end_stemming(wort.replace(u'ß', u'ss'))
+    # Ersetze alle 'ß' durch 'ss'
+    wort = wort.replace(u'ß', u'ss')
+    #   Schützenswerte 'u' bzw. 'y' werden durch 'U' bzw. 'Y' ersetzt
+    for e in map(None, wort, range(len(wort))):
+        if e[1] == 0: continue
+        if u'u' in e:
+            try:
+                if ((wort[(e[1]-1)] in vokale) and (wort[(e[1]+1)] in vokale)): wort = wort[:e[1]] + u'U' + wort[(e[1]+1):]
+            except : pass
+        if  u'y' in e:
+            try:
+                if ((wort[(e[1]-1)] in vokale) and (wort[(e[1]+1)] in vokale)): wort = wort[:e[1]] + u'Y' + wort[(e[1]+1):]
+            except: pass
+    #   r1, r2, p1 & p2 werden mit Werten belegt
+    try:
+        Bedingung = False
+        for e in map(None, wort, range(len(wort))):
+            if e[0] in vokale: Bedingung = True
+            if ((e[0] in konsonanten) and (Bedingung)):
+                p1 = e[1] + 1 
+                r1 = wort[p1:]
+                break
+        Bedingung = False
+        for e in map(None, r1, range(len(r1))):
+            if e[0] in vokale: Bedingung = True
+            if ((e[0] in konsonanten) and (Bedingung)):
+                p2 = e[1] + 1 
+                r2 = r1[p2:]
+                break
+        if ((p1 < 3)and(p1 > 0)):
+            p1 = 3
+            r1 = wort[p1:]
+        if p1 == 0:
+            return end_stemming(wort)
+    except: pass
+    #   Die Schritte 1 bis 3 d) 'stemmen' das übergebene Wort. 
+    #   Schritt 1
+    eSuffixe_1 = [u'e', u'em', u'en', u'ern', u'er', u'es']
+    eSonst_1 = [u's']
+    try:
+        for e in eSuffixe_1:
+            if e in r1[-(len(e)):]:
+                wort = wort[:-(len(e))]
+                r1 = r1[:-(len(e))]
+                r2 = r2[:-(len(e))]
+                break
+        else:
+            if r1[-1] in eSonst_1:
+                if wort[-2] in s_endung:
+                    wort = wort[:-1]
+                    r1 = r1[:-1]
+                    r2 = r2[:-1]
+    except: pass
+    #   Schritt 2
+    eSuffixe_2 = [u'est', u'er', u'en']
+    eSonst_2 = [u'st']
+    try:
+        for e in eSuffixe_2:
+            if e in r1[-len(e):]:
+                wort = wort[:-len(e)]
+                r1 = r1[:-len(e)]
+                r2 = r2[:-len(e)]
+                break
+        else:
+            if r1[-2:] in eSonst_2:             
+                if wort[-3] in st_endung:
+                    if len(wort) > 5:
+                        wort = wort[:-2]
+                        r1 = r1[:-2]
+                        r2 = r2[:-2]
+    except:pass
+    #   Schritt 3 a)
+    dSuffixe_1 = [u'end', u'ung']
+    try:
+        for e in dSuffixe_1:
+            if e in r2[-(len(e)):]:
+                if u'ig' in r2[-(len(e)+2):-(len(e))]:
+                    if u'e' in wort[-(len(e)+3)]:
+                        wort = wort[:-(len(e))]
+                        r1 = r1[:-(len(e))]
+                        r2 = r2[:-(len(e))]
+                        break
+                    else:
+                        wort = wort[:-(len(e)+2)]
+                        r2 = r2[:-(len(e)+2)]
+                        r1 = r1[:-(len(e)+2)]
+                        break
+                else:
+                    wort = wort[:-(len(e))]
+                    r2 = r2[:-(len(e))]
+                    r1 = r1[:-(len(e))]
+                return end_stemming(wort)
+    except: pass
+    #   Schritt 3 b)
+    dSuffixe_2 = [u'ig', u'ik', u'isch']
+    try:
+        for e in dSuffixe_2:
+            if e in r2[-(len(e)):]:
+                if ((u'e' in wort[-(len(e)+1)])):
+                    pass
+                else:
+                    wort = wort[:-(len(e))]
+                    r2 = r2[:-(len(e))]
+                    r1 = r1[:-(len(e))]
+                    break
+    except: pass
+    #   Schritt 3 c)
+    dSuffixe_3 = [u'lich', u'heit']
+    sonder_1 = [u'er', u'en']
+    try: 
+        for e in dSuffixe_3:
+            if e in r2[-(len(e)):]:
+                for i in sonder_1:
+                    if i in r1[-(len(e)+len(i)):-(len(e))]:
+                        wort = wort[:-(len(e)+len(i))]
+                        r1 = r1[:-(len(e)+len(i))]
+                        r2 = r2[:-(len(e)+len(i))]
+                        break
+                else:
+                    wort = wort[:-(len(e))]
+                    r1 = r1[:-(len(e))]
+                    r2 = r2[:-(len(e))]
+                    break
+                        
+    except: pass
+    #   Schritt 3 d)
+    dSuffixe_4 = [u'keit']
+    sonder_2 = [u'lich', u'ig']
+    try:
+        for e in dSuffixe_4:
+            if e in r2[-(len(e)):]:
+                for i in sonder_2:
+                    if i in r2[-(len(e)+len(i)):-(len(e))]:
+                        wort = wort[:-(len(e)+len(i))]
+                        break
+                else:
+                    wort = wort[:-(len(e))]
+                                    
+    except: pass
+    return end_stemming(wort)
+
+#  end_stemming verwandelt u'ä', u'ö', u'ü' in den "Grundvokal" und macht 'U' bzw. 'Y' klein. 
+def end_stemming(wort):
+    return wort.replace(u'ä', u'a').replace(u'ö', u'o').replace(
+        u'ü', u'u').replace(u'U', u'u').replace(u'Y', u'y')

search/porter_stemmers/en.py

+# Copyright (c) 2008 Michael Dirolf (mike at dirolf dot com)
+
+# Permission is hereby granted, free of charge, to any person
+# obtaining a copy of this software and associated documentation
+# files (the "Software"), to deal in the Software without
+# restriction, including without limitation the rights to use,
+# copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following
+# conditions:
+
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+"""pyporter2: An implementation of the Porter2 stemming algorithm.
+
+See http://snowball.tartarus.org/algorithms/english/stemmer.html"""
+import unittest, re
+
+regexp = re.compile(r"[^aeiouy]*[aeiouy]+[^aeiouy](\w*)")
+def get_r1(word):
+    # exceptional forms
+    if word.startswith('gener') or word.startswith('arsen'):
+        return 5
+    if word.startswith('commun'):
+        return 6
+
+    # normal form
+    match = regexp.match(word)
+    if match:
+        return match.start(1)
+    return len(word)
+
+def get_r2(word):
+    match = regexp.match(word, get_r1(word))
+    if match:
+        return match.start(1)
+    return len(word)
+
+def ends_with_short_syllable(word):
+    if len(word) == 2:
+        if re.match(r"^[aeiouy][^aeiouy]$", word):
+            return True
+    if re.match(r".*[^aeiouy][aeiouy][^aeiouywxY]$", word):
+        return True
+    return False
+
+def is_short_word(word):
+    if ends_with_short_syllable(word):
+        if get_r1(word) == len(word):
+            return True
+    return False
+
+def remove_initial_apostrophe(word):
+    if word.startswith("'"):
+        return word[1:]
+    return word
+
+def capitalize_consonant_ys(word):
+    if word.startswith('y'):
+        word = 'Y' + word[1:]
+    return re.sub(r"([aeiouy])y", '\g<1>Y', word)
+
+def step_0(word):
+    if word.endswith("'s'"):
+        return word[:-3]
+    if word.endswith("'s"):
+        return word[:-2]
+    if word.endswith("'"):
+        return word[:-1]
+    return word
+
+def step_1a(word):
+    if word.endswith('sses'):
+        return word[:-4] + 'ss'
+    if word.endswith('ied') or word.endswith('ies'):
+        if len(word) > 4:
+            return word[:-3] + 'i'
+        else:
+            return word[:-3] + 'ie'
+    if word.endswith('us') or word.endswith('ss'):
+        return word
+    if word.endswith('s'):
+        preceding = word[:-1]
+        if re.search(r"[aeiouy].", preceding):
+            return preceding
+        return word
+    return word
+
+def step_1b(word, r1):
+    if word.endswith('eedly'):
+        if len(word) - 5 >= r1:
+            return word[:-3]
+        return word
+    if word.endswith('eed'):
+        if len(word) - 3 >= r1:
+            return word[:-1]
+        return word
+
+    def ends_with_double(word):
+        doubles = ['bb', 'dd', 'ff', 'gg', 'mm', 'nn', 'pp', 'rr', 'tt']
+        for double in doubles:
+            if word.endswith(double):
+                return True
+        return False
+
+    def step_1b_helper(word):
+        if word.endswith('at') or word.endswith('bl') or word.endswith('iz'):
+            return word + 'e'
+        if ends_with_double(word):
+            return word[:-1]
+        if is_short_word(word):
+            return word + 'e'
+        return word
+
+    suffixes = ['ed', 'edly', 'ing', 'ingly']
+    for suffix in suffixes:
+        if word.endswith(suffix):
+            preceding = word[:-len(suffix)]
+            if re.search(r"[aeiouy]", preceding):
+                return step_1b_helper(preceding)
+            return word
+
+    return word
+
+def step_1c(word):
+    if word.endswith('y') or word.endswith('Y'):
+        if word[-2] not in 'aeiouy':
+            if len(word) > 2:
+                return word[:-1] + 'i'
+    return word
+
+def step_2(word, r1):
+    def step_2_helper(end, repl, prev):
+        if word.endswith(end):
+            if len(word) - len(end) >= r1:
+                if prev == []:
+                    return word[:-len(end)] + repl
+                for p in prev:
+                    if word[:-len(end)].endswith(p):
+                        return word[:-len(end)] + repl
+            return word
+        return None
+
+    triples = [('ization', 'ize', []),
+               ('ational', 'ate', []),
+               ('fulness', 'ful', []),
+               ('ousness', 'ous', []),
+               ('iveness', 'ive', []),
+               ('tional', 'tion', []),
+               ('biliti', 'ble', []),
+               ('lessli', 'less', []),
+               ('entli', 'ent', []),
+               ('ation', 'ate', []),
+               ('alism', 'al', []),
+               ('aliti', 'al', []),
+               ('ousli', 'ous', []),
+               ('iviti', 'ive', []),
+               ('fulli', 'ful', []),
+               ('enci', 'ence', []),
+               ('anci', 'ance', []),
+               ('abli', 'able', []),
+               ('izer', 'ize', []),
+               ('ator', 'ate', []),
+               ('alli', 'al', []),
+               ('bli', 'ble', []),
+               ('ogi', 'og', ['l']),
+               ('li', '', ['c', 'd', 'e', 'g', 'h', 'k', 'm', 'n', 'r', 't'])]
+
+    for trip in triples:
+        attempt = step_2_helper(trip[0], trip[1], trip[2])
+        if attempt:
+            return attempt
+
+    return word
+
+def step_3(word, r1, r2):
+    def step_3_helper(end, repl, r2_necessary):
+        if word.endswith(end):
+            if len(word) - len(end) >= r1:
+                if not r2_necessary:
+                    return word[:-len(end)] + repl
+                else:
+                    if len(word) - len(end) >= r2:
+                        return word[:-len(end)] + repl
+            return word
+        return None
+
+    triples = [('ational', 'ate', False),
+               ('tional', 'tion', False),
+               ('alize', 'al', False),
+               ('icate', 'ic', False),
+               ('iciti', 'ic', False),
+               ('ative', '', True),
+               ('ical', 'ic', False),
+               ('ness', '', False),
+               ('ful', '', False)]
+
+    for trip in triples:
+        attempt = step_3_helper(trip[0], trip[1], trip[2])
+        if attempt:
+            return attempt
+
+    return word
+
+def step_4(word, r2):
+    delete_list = ['al', 'ance', 'ence', 'er', 'ic', 'able', 'ible', 'ant', 'ement', 'ment', 'ent', 'ism', 'ate', 'iti', 'ous', 'ive', 'ize']
+
+    for end in delete_list:
+        if word.endswith(end):
+            if len(word) - len(end) >= r2:
+                return word[:-len(end)]
+            return word
+
+    if word.endswith('sion') or word.endswith('tion'):
+        if len(word) - 3 >= r2:
+            return word[:-3]
+
+    return word
+
+def step_5(word, r1, r2):
+    if word.endswith('l'):
+        if len(word) - 1 >= r2 and word[-2] == 'l':
+            return word[:-1]
+        return word
+
+    if word.endswith('e'):
+        if len(word) - 1 >= r2:
+            return word[:-1]
+        if len(word) - 1 >= r1 and not ends_with_short_syllable(word[:-1]):
+            return word[:-1]
+
+    return word
+
+def normalize_ys(word):
+    return word.replace('Y', 'y')
+
+exceptional_forms = {'skis': 'ski',
+                    'skies': 'sky',
+                    'dying': 'die',
+                    'lying': 'lie',
+                    'tying': 'tie',
+                    'idly': 'idl',
+                    'gently': 'gentl',
+                    'ugly': 'ugli',
+                    'early': 'earli',
+                    'only': 'onli',
+                    'singly': 'singl',
+                    'sky': 'sky',
+                    'news': 'news',
+                    'howe': 'howe',
+                    'atlas': 'atlas',
+                    'cosmos': 'cosmos',
+                    'bias': 'bias',
+                    'andes': 'andes'}
+
+exceptional_early_exit_post_1a = ['inning', 'outing', 'canning', 'herring', 'earring', 'proceed', 'exceed', 'succeed']
+
+def stem(word):
+    """The main entry point in the old version of the API."""
+    return Stemmer._stem(word)
+
+def algorithms():
+    """Get a list of the names of the available stemming algorithms.
+
+    The only algorithm currently supported is the "english", or porter2,
+    algorithm.
+    """
+    return ['english']
+
+def version ():
+    """Get the version number of the stemming module.
+
+    This is the version number of the Stemmer module as a whole (not for an
+    individual algorithm).
+    """
+    return '1.0.0'
+
+class Stemmer:
+    """An instance of a stemming algorithm.
+
+    When creating a Stemmer object, there is one required argument:
+    the name of the algorithm to use in the new stemmer. A list of the
+    valid algorithm names may be obtained by calling the algorithms()
+    function in this module. In addition, the appropriate stemming algorithm
+    for a given language may be obtained by using the 2 or 3 letter ISO 639
+    language codes.
+    """
+    max_cache_size = 10000
+
+    def __init__ (self, algorithm, cache_size=None):
+        if algorithm not in ['english', 'eng', 'en']:
+            raise KeyError("Stemming algorithm '%s' not found" % algorithm)
+        if cache_size:
+            self.max_cache_size = cache_size
+
+    def stemWord(self, word):
+        """Stem a word.
+
+        This takes a single argument, word, which should either be a UTF-8
+        encoded string, or a unicode object.
+
+        The result is the stemmed form of the word. If the word supplied
+        was a unicode object, the result will be a unicode object: if the
+        word supplied was a string, the result will be a UTF-8 encoded string.
+        """
+        return Stemmer._stem(word)
+
+    def stemWords(self, words):
+        """Stem a list of words.
+
+        This takes a single argument, words, which must be a sequence,
+        iterator, generator or similar.
+
+        The entries in words should either be UTF-8 encoded strings,
+        or a unicode objects.
+
+        The result is a list of the stemmed forms of the words. If the word
+        supplied was a unicode object, the stemmed form will be a unicode
+        object: if the word supplied was a string, the stemmed form will
+        be a UTF-8 encoded string.
+        """
+        return [self.stemWord(word) for word in words]
+
+    @classmethod
+    def _stem(cls, word):
+        was_unicode = False
+
+        if len(word) <= 2:
+            return word
+
+        if isinstance(word, unicode):
+            was_unicode = True
+            word = word.encode('utf-8')
+
+        word = remove_initial_apostrophe(word)
+
+        # handle some exceptional forms
+        if word in exceptional_forms:
+            return exceptional_forms[word]
+
+        word = capitalize_consonant_ys(word)
+        r1 = get_r1(word)
+        r2 = get_r2(word)
+        word = step_0(word)
+        word = step_1a(word)
+
+        # handle some more exceptional forms
+        if word in exceptional_early_exit_post_1a:
+            return word
+
+        word = step_1b(word, r1)
+        word = step_1c(word)
+        word = step_2(word, r1)
+        word = step_3(word, r1, r2)
+        word = step_4(word, r2)
+        word = step_5(word, r1, r2)
+        word = normalize_ys(word)
+
+        if was_unicode:
+            return word.decode('utf-8')
+        return word
+
+class TestPorter2(unittest.TestCase):
+    def setUp(self):
+        pass
+
+    def testModule(self):
+        self.assertEqual(algorithms(), ['english'])
+        self.assertEqual(version(), '1.0.0')
+        self.assertRaises(KeyError, Stemmer, 'porter')
+        self.assertRaises(KeyError, Stemmer, 'random')
+        stemmer = Stemmer('english')
+        stemmer = Stemmer('en')
+        stemmer = Stemmer('eng')
+
+    def testDeprecation(self):
+        self.assertRaises(DeprecationWarning, stem, 'stemming')
+
+    def testGetR1(self):
+        self.assertEqual(get_r1('beautiful'), 5)
+        self.assertEqual(get_r1('beauty'), 5)
+        self.assertEqual(get_r1('beau'), 4)
+        self.assertEqual(get_r1('animadversion'), 2)
+        self.assertEqual(get_r1('sprinkled'), 5)
+        self.assertEqual(get_r1('eucharist'), 3)
+
+        # test exceptional forms
+        self.assertEqual(get_r1('gener'), 5)
+        self.assertEqual(get_r1('generous'), 5)
+        self.assertEqual(get_r1('generousity'), 5)
+        self.assertEqual(get_r1('general'), 5)
+        self.assertEqual(get_r1('generally'), 5)
+        self.assertEqual(get_r1('generality'), 5)
+        self.assertEqual(get_r1('commun'), 6)
+        self.assertEqual(get_r1('communist'), 6)
+        self.assertEqual(get_r1('communal'), 6)
+        self.assertEqual(get_r1('communistic'), 6)
+        self.assertEqual(get_r1('arsen'), 5)
+        self.assertEqual(get_r1('arsenic'), 5)
+        self.assertEqual(get_r1('arsenal'), 5)
+        self.assertEqual(get_r1('arsenality'), 5)
+
+    def testGetR2(self):
+        self.assertEqual(get_r2('beautiful'), 7)
+        self.assertEqual(get_r2('beauty'), 6)
+        self.assertEqual(get_r2('beau'), 4)
+        self.assertEqual(get_r2('animadversion'), 4)
+        self.assertEqual(get_r2('sprinkled'), 9)
+        self.assertEqual(get_r2('eucharist'), 6)
+
+    def testEndsWithShortSyllable(self):
+        self.assertEqual(ends_with_short_syllable(''), False)
+        self.assertEqual(ends_with_short_syllable('rap'), True)
+        self.assertEqual(ends_with_short_syllable('trap'), True)
+        self.assertEqual(ends_with_short_syllable('entrap'), True)
+        self.assertEqual(ends_with_short_syllable('ow'), True)
+        self.assertEqual(ends_with_short_syllable('on'), True)
+        self.assertEqual(ends_with_short_syllable('at'), True)
+        self.assertEqual(ends_with_short_syllable('uproot'), False)
+        self.assertEqual(ends_with_short_syllable('bestow'), False)
+        self.assertEqual(ends_with_short_syllable('disturb'), False)
+
+    def testIsShortWord(self):
+        self.assertEqual(is_short_word(''), False)
+        self.assertEqual(is_short_word('bed'), True)
+        self.assertEqual(is_short_word('shed'), True)
+        self.assertEqual(is_short_word('shred'), True)
+        self.assertEqual(is_short_word('bead'), False)
+        self.assertEqual(is_short_word('embed'), False)
+        self.assertEqual(is_short_word('beds'), False)
+
+    def testRemoveInitialApostrophe(self):
+        self.assertEqual(remove_initial_apostrophe(''), '')
+        self.assertEqual(remove_initial_apostrophe('mike'), 'mike')
+        self.assertEqual(remove_initial_apostrophe('\'mike'), 'mike')
+        self.assertEqual(remove_initial_apostrophe('\'mi\'e'), 'mi\'e')
+        self.assertEqual(remove_initial_apostrophe('\'til'), 'til')
+
+    def testCapitalizeConsonantYs(self):
+        self.assertEqual(capitalize_consonant_ys(''), '')
+        self.assertEqual(capitalize_consonant_ys('mike'), 'mike')
+        self.assertEqual(capitalize_consonant_ys('youth'), 'Youth')
+        self.assertEqual(capitalize_consonant_ys('boy'), 'boY')
+        self.assertEqual(capitalize_consonant_ys('boyish'), 'boYish')
+        self.assertEqual(capitalize_consonant_ys('fly'), 'fly')
+        self.assertEqual(capitalize_consonant_ys('flying'), 'flying')
+        self.assertEqual(capitalize_consonant_ys('syzygy'), 'syzygy')
+        self.assertEqual(capitalize_consonant_ys('sayyid'), 'saYyid')
+
+    def testStep0(self):
+        self.assertEqual(step_0(''), '')
+        self.assertEqual(step_0('mike'), 'mike')
+        self.assertEqual(step_0('dog\'s'), 'dog')
+        self.assertEqual(step_0('dog\'s\''), 'dog')
+        self.assertEqual(step_0('dog\''), 'dog')
+
+    def testStep1a(self):
+        self.assertEqual(step_1a(''), '')
+        self.assertEqual(step_1a('caresses'), 'caress')
+        self.assertEqual(step_1a('sses'), 'ss')
+        self.assertEqual(step_1a('ssesmike'), 'ssesmike')
+        self.assertEqual(step_1a('tied'), 'tie')
+        self.assertEqual(step_1a('cries'), 'cri')
+        self.assertEqual(step_1a('ties'), 'tie')
+        self.assertEqual(step_1a('hurried'), 'hurri')
+        self.assertEqual(step_1a('gas'), 'gas')
+        self.assertEqual(step_1a('this'), 'this')
+        self.assertEqual(step_1a('gaps'), 'gap')
+        self.assertEqual(step_1a('kiwis'), 'kiwi')
+        self.assertEqual(step_1a('bus'), 'bus')
+        self.assertEqual(step_1a('mikeus'), 'mikeus')
+        self.assertEqual(step_1a('mikess'), 'mikess')
+        self.assertEqual(step_1a('truss'), 'truss')
+
+    def testStep1b(self):
+        self.assertEqual(step_1b('', 0), '')
+        self.assertEqual(step_1b('ed', 0), 'ed')
+        self.assertEqual(step_1b('eed', 1), 'eed')
+        self.assertEqual(step_1b('ing', 0), 'ing')
+        self.assertEqual(step_1b('heed', 2), 'heed')
+        self.assertEqual(step_1b('coheed', 2), 'cohee')
+        self.assertEqual(step_1b('coheed', 3), 'cohee')
+        self.assertEqual(step_1b('heedly', 3), 'heedly')
+        self.assertEqual(step_1b('heedly', 0), 'hee')
+        self.assertEqual(step_1b('shred', 0), 'shred')
+        self.assertEqual(step_1b('luxuriated', 0), 'luxuriate')
+        self.assertEqual(step_1b('luxuriatedly', 0), 'luxuriate')
+        self.assertEqual(step_1b('luxuriating', 0), 'luxuriate')
+        self.assertEqual(step_1b('luxuriatingly', 0), 'luxuriate')
+        self.assertEqual(step_1b('disabled', 0), 'disable')
+        self.assertEqual(step_1b('disablingly', 0), 'disable')
+        self.assertEqual(step_1b('cauterizedly', 0), 'cauterize')
+        self.assertEqual(step_1b('cauterizing', 0), 'cauterize')
+        self.assertEqual(step_1b('hopped', 0), 'hop')
+        self.assertEqual(step_1b('clubbing', 0), 'club')
+        self.assertEqual(step_1b('troddedly', 0), 'trod')
+        self.assertEqual(step_1b('puffingly', 0), 'puf')
+        self.assertEqual(step_1b('hagged', 0), 'hag')
+        self.assertEqual(step_1b('spamming', 0), 'spam')
+        self.assertEqual(step_1b('shunnedly', 0), 'shun')
+        self.assertEqual(step_1b('torred', 0), 'tor')
+        self.assertEqual(step_1b('catted', 0), 'cat')
+        self.assertEqual(step_1b('exazzedly', 0), 'exazz')
+        self.assertEqual(step_1b('hoped', 0), 'hope')
+        self.assertEqual(step_1b('hopedly', 0), 'hope')
+        self.assertEqual(step_1b('hoping', 0), 'hope')
+        self.assertEqual(step_1b('hopingly', 0), 'hope')
+        self.assertEqual(step_1b('coped', 0), 'cope')
+
+    def testStep1c(self):
+        self.assertEqual(step_1c(''), '')
+        self.assertEqual(step_1c('cry'), 'cri')
+        self.assertEqual(step_1c('by'), 'by')
+        self.assertEqual(step_1c('say'), 'say')
+        self.assertEqual(step_1c('crY'), 'cri')
+        self.assertEqual(step_1c('bY'), 'bY')
+        self.assertEqual(step_1c('saY'), 'saY')
+
+    def testStep2(self):
+        self.assertEqual(step_2('', 0), '')
+        self.assertEqual(step_2('mike', 0), 'mike')
+        self.assertEqual(step_2('emotional', 2), 'emotion')
+        self.assertEqual(step_2('emotional', 4), 'emotional')
+        self.assertEqual(step_2('fenci', 1), 'fence')
+        self.assertEqual(step_2('fenci', 2), 'fenci')
+        self.assertEqual(step_2('necromanci', 3), 'necromance')
+        self.assertEqual(step_2('necromanci', 7), 'necromanci')
+        self.assertEqual(step_2('disabli', 3), 'disable')
+        self.assertEqual(step_2('disabli', 4), 'disabli')
+        self.assertEqual(step_2('evidentli', 2), 'evident')
+        self.assertEqual(step_2('evidentli', 5), 'evidentli')
+        self.assertEqual(step_2('kaizer', 2), 'kaize')
+        self.assertEqual(step_2('kaizer', 3), 'kaizer')
+        self.assertEqual(step_2('kaization', 2), 'kaize')
+        self.assertEqual(step_2('kaization', 8), 'kaization')
+        self.assertEqual(step_2('operational', 2), 'operate')
+        self.assertEqual(step_2('operational', 5), 'operational')
+        self.assertEqual(step_2('operation', 2), 'operate')
+        self.assertEqual(step_2('operation', 5), 'operation')
+        self.assertEqual(step_2('operator', 2), 'operate')
+        self.assertEqual(step_2('operator', 5), 'operator')
+        self.assertEqual(step_2('rationalism', 3), 'rational')
+        self.assertEqual(step_2('rationalism', 7), 'rationalism')
+        self.assertEqual(step_2('rationaliti', 3), 'rational')
+        self.assertEqual(step_2('rationaliti', 7), 'rationaliti')
+        self.assertEqual(step_2('rationalli', 3), 'rational')
+        self.assertEqual(step_2('rationalli', 7), 'rationalli')
+        self.assertEqual(step_2('gratefulness', 4), 'grateful')
+        self.assertEqual(step_2('gratefulness', 6), 'gratefulness')
+        self.assertEqual(step_2('obviousli', 2), 'obvious')
+        self.assertEqual(step_2('obviousli', 5), 'obviousli')
+        self.assertEqual(step_2('obviousness', 2), 'obvious')
+        self.assertEqual(step_2('obviousness', 5), 'obviousness')
+        self.assertEqual(step_2('responsiveness', 7), 'responsive')
+        self.assertEqual(step_2('responsiveness', 8), 'responsiveness')
+        self.assertEqual(step_2('responsiviti', 3), 'responsive')
+        self.assertEqual(step_2('responsiviti', 10), 'responsiviti')
+        self.assertEqual(step_2('abiliti', 1), 'able')
+        self.assertEqual(step_2('abiliti', 2), 'abiliti')
+        self.assertEqual(step_2('cebli', 2), 'ceble')
+        self.assertEqual(step_2('cebli', 3), 'cebli')
+        self.assertEqual(step_2('apogi', 2), 'apogi')
+        self.assertEqual(step_2('illogi', 2), 'illog')
+        self.assertEqual(step_2('illogi', 4), 'illogi')
+        self.assertEqual(step_2('gracefulli', 4), 'graceful')
+        self.assertEqual(step_2('gracefulli', 6), 'gracefulli')
+        self.assertEqual(step_2('classlessli', 4), 'classless')
+        self.assertEqual(step_2('classlessli', 6), 'classlessli')
+        self.assertEqual(step_2('cali', 0), 'cali')
+        self.assertEqual(step_2('acli', 0), 'ac')
+        self.assertEqual(step_2('acli', 3), 'acli')
+        self.assertEqual(step_2('adli', 0), 'ad')
+        self.assertEqual(step_2('beli', 0), 'be')
+        self.assertEqual(step_2('agli', 2), 'ag')
+        self.assertEqual(step_2('agli', 3), 'agli')
+        self.assertEqual(step_2('thli', 0), 'th')
+        self.assertEqual(step_2('likli', 0), 'lik')
+        self.assertEqual(step_2('homili', 0), 'homili')
+        self.assertEqual(step_2('tamli', 2), 'tam')
+        self.assertEqual(step_2('openli', 0), 'open')
+        self.assertEqual(step_2('earli', 3), 'ear')
+        self.assertEqual(step_2('earli', 4), 'earli')
+        self.assertEqual(step_2('tartli', 2), 'tart')
+
+    def testStep3(self):
+        self.assertEqual(step_3('', 0, 0), '')
+        self.assertEqual(step_3('mike', 0, 0), 'mike')
+        self.assertEqual(step_3('relational', 3, 0), 'relate')
+        self.assertEqual(step_3('relational', 4, 9), 'relational')
+        self.assertEqual(step_3('emotional', 2, 9), 'emotion')
+        self.assertEqual(step_3('emotional', 4, 0), 'emotional')
+        self.assertEqual(step_3('rationalize', 3, 0), 'rational')
+        self.assertEqual(step_3('rationalize',7, 9), 'rationalize')
+        self.assertEqual(step_3('intricate', 2, 9), 'intric')
+        self.assertEqual(step_3('intricate', 7, 0), 'intricate')
+        self.assertEqual(step_3('intriciti', 2, 0), 'intric')
+        self.assertEqual(step_3('intriciti', 5, 9), 'intriciti')
+        self.assertEqual(step_3('intrical', 4, 9), 'intric')
+        self.assertEqual(step_3('intrical', 5, 0), 'intrical')
+        self.assertEqual(step_3('youthful', 4, 0), 'youth')
+        self.assertEqual(step_3('youthful', 6, 0), 'youthful')
+        self.assertEqual(step_3('happiness', 3, 0), 'happi')
+        self.assertEqual(step_3('happiness', 6, 0), 'happiness')
+        self.assertEqual(step_3('decorative', 3, 5), 'decor')
+        self.assertEqual(step_3('decorative', 3, 6), 'decorative')
+        self.assertEqual(step_3('decorative', 6, 5), 'decorative')
+
+    def testStep4(self):
+        self.assertEqual(step_4('', 0), '')
+        self.assertEqual(step_4('mike', 0), 'mike')
+        self.assertEqual(step_4('penal', 3), 'pen')
+        self.assertEqual(step_4('penal', 4), 'penal')
+        self.assertEqual(step_4('pance', 1), 'p')
+        self.assertEqual(step_4('pance', 2), 'pance')
+        self.assertEqual(step_4('dence', 0), 'd')
+        self.assertEqual(step_4('dence', 4), 'dence')
+        self.assertEqual(step_4('header', 3), 'head')
+        self.assertEqual(step_4('header', 5), 'header')
+        self.assertEqual(step_4('graphic', 5), 'graph')
+        self.assertEqual(step_4('graphic', 6), 'graphic')
+        self.assertEqual(step_4('table', 0), 't')
+        self.assertEqual(step_4('table', 2), 'table')
+        self.assertEqual(step_4('quible', 1), 'qu')
+        self.assertEqual(step_4('quible', 3), 'quible')
+        self.assertEqual(step_4('recant', 1), 'rec')
+        self.assertEqual(step_4('recant', 5), 'recant')
+        self.assertEqual(step_4('lement', 0), 'l')
+        self.assertEqual(step_4('lement', 2), 'lement')
+        self.assertEqual(step_4('ment', 0), '')
+        self.assertEqual(step_4('ment', 1), 'ment')
+        self.assertEqual(step_4('ent', 0), '')
+        self.assertEqual(step_4('ent', 2), 'ent')
+        self.assertEqual(step_4('schism', 3), 'sch')
+        self.assertEqual(step_4('schism', 4), 'schism')
+        self.assertEqual(step_4('kate', 1), 'k')
+        self.assertEqual(step_4('kate', 2), 'kate')
+        self.assertEqual(step_4('citi', 0), 'c')
+        self.assertEqual(step_4('citi', 3), 'citi')
+        self.assertEqual(step_4('lous', 1), 'l')
+        self.assertEqual(step_4('lous', 2), 'lous')
+        self.assertEqual(step_4('hive', 0), 'h')
+        self.assertEqual(step_4('hive', 3), 'hive')
+        self.assertEqual(step_4('ize', 0), '')
+        self.assertEqual(step_4('ize', 1), 'ize')
+
+    def testStep5(self):
+        self.assertEqual(step_5('mik', 0, 0), 'mik')
+        self.assertEqual(step_5('mike', 5, 3), 'mik')
+        self.assertEqual(step_5('mike', 5, 4), 'mike')
+        self.assertEqual(step_5('mike', 3, 4), 'mike')
+        self.assertEqual(step_5('mixe', 3, 4), 'mix')
+        self.assertEqual(step_5('recall', 7, 5), 'recal')
+        self.assertEqual(step_5('recal', 0, 4), 'recal')
+        self.assertEqual(step_5('recall', 0, 6), 'recall')
+
+    def testNormalizeYs(self):
+        self.assertEqual(normalize_ys(''), '')
+        self.assertEqual(normalize_ys('mike'), 'mike')
+        self.assertEqual(normalize_ys('syzygy'), 'syzygy')
+        self.assertEqual(normalize_ys('sYzygY'), 'syzygy')
+        self.assertEqual(normalize_ys('MiKe'), 'MiKe')
+        self.assertEqual(normalize_ys('MDirYol'), 'MDiryol')
+
+    def testStem(self):
+        stemmer = Stemmer('english')
+        self.assertEqual(stemmer.stemWord(''), '')
+
+        # some normal case tests
+        self.assertEqual(stemmer.stemWord('mike'), 'mike')
+        self.assertEqual(stemmer.stemWord('consign'), 'consign')
+        self.assertEqual(stemmer.stemWord('consigned'), 'consign')
+        self.assertEqual(stemmer.stemWord('consigning'), 'consign')
+        self.assertEqual(stemmer.stemWord('consignment'), 'consign')
+        self.assertEqual(stemmer.stemWord('consist'), 'consist')
+        self.assertEqual(stemmer.stemWord('consisted'), 'consist')
+        self.assertEqual(stemmer.stemWord('consistency'), 'consist')
+        self.assertEqual(stemmer.stemWord('consistent'), 'consist')
+        self.assertEqual(stemmer.stemWord('consistently'), 'consist')
+        self.assertEqual(stemmer.stemWord('consisting'), 'consist')
+        self.assertEqual(stemmer.stemWord('consists'), 'consist')
+
+        # exceptionalWord form tests
+        self.assertEqual(stemmer.stemWord('skis'), 'ski')
+        self.assertEqual(stemmer.stemWord('skies'), 'sky')
+        self.assertEqual(stemmer.stemWord('dying'), 'die')
+        self.assertEqual(stemmer.stemWord('lying'), 'lie')
+        self.assertEqual(stemmer.stemWord('tying'), 'tie')
+        self.assertEqual(stemmer.stemWord('idly'), 'idl')
+        self.assertEqual(stemmer.stemWord('gently'), 'gentl')
+        self.assertEqual(stemmer.stemWord('ugly'), 'ugli')
+        self.assertEqual(stemmer.stemWord('early'), 'earli')
+        self.assertEqual(stemmer.stemWord('only'), 'onli')
+        self.assertEqual(stemmer.stemWord('singly'), 'singl')
+        self.assertEqual(stemmer.stemWord('sky'), 'sky')
+        self.assertEqual(stemmer.stemWord('news'), 'news')
+        self.assertEqual(stemmer.stemWord('howe'), 'howe')
+        self.assertEqual(stemmer.stemWord('atlas'), 'atlas')
+        self.assertEqual(stemmer.stemWord('cosmos'), 'cosmos')
+        self.assertEqual(stemmer.stemWord('bias'), 'bias')
+        self.assertEqual(stemmer.stemWord('andes'), 'andes')
+        self.assertEqual(stemmer.stemWord('innings'), 'inning')
+        self.assertEqual(stemmer.stemWord('outing'), 'outing')
+        self.assertEqual(stemmer.stemWord('canninger'), 'canning')
+        self.assertEqual(stemmer.stemWord('herrings'), 'herring')
+        self.assertEqual(stemmer.stemWord('earring'), 'earring')
+        self.assertEqual(stemmer.stemWord('proceeder'), 'proceed')
+        self.assertEqual(stemmer.stemWord('exceeding'), 'exceed')
+        self.assertEqual(stemmer.stemWord('succeeds'), 'succeed')
+
+        # hardcore test
+        infile = open('./en-voc.txt', 'r')
+        outfile = open('./en-stemmedvoc.txt', 'r')
+        while True:
+            word = infile.readline()
+            output = outfile.readline()
+            if word == '':
+                break
+            word = word[:-1]
+            output = output[:-1]
+            self.assertEqual(stemmer.stemWord(word), output)
+
+if __name__ == '__main__':
+    unittest.main()

search/static/search/autocomplete_activator.js

+/*
+ * This file enables the auto-complete feature. To be autocompleted, an 
+ * input field has to include the src attribute to indicate the path 
+ * to the live_search view, which returns a json file with data. 
+ * Moreover, to enable the redirection to the corresponding 
+ * page of the selected item, this path must be followed by '/redirect'
+ * (without trailing slash).
+ */
+
+$(document).ready(function() {
+  $('input[type="text"][src]').livequery(function() {
+    var options = ['multipleValues', 'selectFirst', 'autoFill', 'mustMatch',
+                   'matchContains'];
+    for (var i=0; i < options.length; i++)
+      eval('var ' + options[i] + ' = false;');
+    
+    for (i=0; i < options.length; i++)
+      if ($(this).hasClass(options[i].replace(/[A-Z]/,
+          '-' + options[i].match(/[A-Z]/)[0].toLowerCase())))
+        eval(options[i] + ' = true;');
+    
+    // append djangos (patched via aep) help text
+    var help_text = $($.Autocompleter.defaults.help_text);
+    if ($('form', $(this).parents()).length) {
+      if ($(this).next().length && $(this).next().hasClass('help-text'))
+        help_text.append($(this).next().clone(true));
+    }
+    //TODO: extend the options with options above
+    $(this).autocomplete($(this).attr('src'), {
+      cacheLength : 10,
+      max: 10,
+      width: 350,
+      scrollHeight: 250,
+      multiple: multipleValues,
+      selectFirst: selectFirst,
+      autoFill: autoFill,
+      mustMatch: mustMatch,
+      matchContains: matchContains,
+      parse: function(data) { return data; },
+      dataType: 'json',
+      formatItem: function(data, i, n, value) { return value; },
+      help_text: help_text
+    });
+    $(this).result(function(event, data, formatted) {
+      if (data["link"]) {
+        document.location.href=data.link;
+      }
+    });
+    $(this).addClass('autocompleting');
+  });
+});

search/static/search/dropdownarrow-over.png

Added
New image

search/static/search/dropdownarrow.png

Added
New image

search/static/search/jquery.autocomplete.css

+/*!
+ * Autocomplete - jQuery plugin 1.0.2
+ *
+ * Copyright (c) 2007 Dylan Verheul, Dan G. Switzer, Anjesh Tuladhar, Jörn Zaefferer
+ *
+ * Dual licensed under the MIT and GPL licenses:
+ *   http://www.opensource.org/licenses/mit-license.php
+ *   http://www.gnu.org/licenses/gpl.html
+ *
+ */
+
+.ac_results {
+	padding: 0px;
+	border: 1px solid black;
+	background-color: white;
+	overflow: hidden;
+	z-index: 99999;
+}
+
+.ac_results ul {
+	width: 100%;
+	list-style-position: outside;
+	list-style: none;
+	padding: 0;
+	margin: 0;
+}
+
+.ac_results li {
+	margin: 0px;
+	padding: 2px 5px;
+	cursor: default;
+	display: block;
+	/* 
+	if width will be 100% horizontal scrollbar will apear 
+	when scroll mode will be used
+	*/
+	/*width: 100%;*/
+	font: menu;
+	font-size: 12px;
+	/* 
+	it is very important, if line-height not setted or setted 
+	in relative units scroll will be broken in firefox
+	*/
+	line-height: 16px;
+	overflow: hidden;
+}
+
+.ac_loading {
+	background: white right center no-repeat;
+}
+
+.ac_odd {
+	background-color: #eee;
+}
+
+.ac_over {
+	background-color: #0A246A;
+	color: white;
+}
+
+.ac-special-result {
+  padding: 2px 5px;
+}
+
+.dropdown {
+  border: 1px solid lightgray;
+  border-top-color: gray; 
+}

search/static/search/jquery.autocomplete.js

+/*
+License: http://gae-full-text-search.appspot.com/license
+Copyright 2009 Waldemar Kornewald & Thomas Wanschik GbR
+Use/redistribution without explicit permission (e.g., a granted license) is prohibited.
+
+This autocomplete jQuery plugin is a customized version based on work by Dylan Verheul, Dan G. Switzer, Anjesh Tuladhar, Jörn Zaefferer
+*/
+
+;(function($) {
+
+$.fn.extend({
+  autocomplete: function(urlOrData, options) {
+    var isUrl = typeof urlOrData == "string";
+    options = $.extend({}, $.Autocompleter.defaults, {
+      url: isUrl ? urlOrData : null,
+      data: isUrl ? null : urlOrData,
+      delay: isUrl ? $.Autocompleter.defaults.delay : 10,
+      max: options && !options.scroll ? 10 : 150
+    }, options);
+
+        // options.lengthPerCacheKey should be the max number of results returned by
+        // the server
+        if (!options.lengthPerCacheKey)
+            options.lengthPerCacheKey = options.max;
+        else {
+            options.lengthPerCacheKey = (options.lengthPerCacheKey < options.max) ?
+                options.max : options.lengthPerCacheKey;
+        }
+    // if highlight is set to false, replace it with a do-nothing function
+    options.highlight = options.highlight || function(value) { return value; };
+    
+    // if the formatMatch option is not specified, then use formatItem for backwards compatibility
+    options.formatMatch = options.formatMatch || options.formatItem;
+    
+    return this.each(function() {
+      new $.Autocompleter(this, options);
+    });
+  },
+  result: function(handler) {
+    return this.bind("result", handler);
+  },
+  search: function(handler) {
+    return this.trigger("search", [handler]);
+  },
+  flushCache: function() {
+    return this.trigger("flushCache");
+  },
+  setOptions: function(options){
+    return this.trigger("setOptions", [options]);
+  },
+  unautocomplete: function() {
+    return this.trigger("unautocomplete");
+  }
+});
+
+$.Autocompleter = function(input, options) {
+
+  var KEY = {
+    UP: 38,
+    DOWN: 40,
+    DEL: 46,
+    TAB: 9,
+    RETURN: 13,
+    ESC: 27,
+    COMMA: 188,
+    PAGEUP: 33,
+    PAGEDOWN: 34,
+    BACKSPACE: 8,
+    LEFT_ARROW: 37,
+    RIGHT_ARROW: 39,
+    HOME: 36,
+    END: 35,
+    SHIFT: 16
+  };
+
+  // Create $ object for input element
+  var $input = $(input).attr("autocomplete", "off").addClass(options.inputClass);
+
+  var timeout;
+  var previousValue = '';
+  var cache = $.Autocompleter.Cache(options);
+  var hasFocus = 0;
+  var lastKeyPressCode;
+  var config = {
+    mouseDownOnSelect: false,
+    from_selection: false
+  };
+  var select = $.Autocompleter.Select(options, input, selectCurrent, config);
+  
+  // function to display facebook-style results (help_text, no results found, ...)
+  var showSpecialResult = function(type) {
+    select.init();
+    select.show(type);
+  };
+  
+  // checks if mouse is over the dropdown arrow
+  var on_dropdownarrow = function(event) {
+    return ($input.offset().left + $input.innerWidth() - parseInt($input.css('padding-right')) < event.pageX
+        && $input.offset().left + $input.innerWidth() > event.pageX &&
+        $input.offset().top + $input.innerHeight() > event.pageY &&
+        $input.offset().top < event.pageY);
+  };
+  
+  // function used to display the complete list, used for the dropdown option
+  var open_complete_list = function () {
+    var data = cache.load('');
+    if (data && data.length) {
+      select.display(data, '');
+      // do not show the result list, if input has been removed
+      if($('body', $input.parents()).get(0) && hasFocus)
+        select.show();
+    }
+
+    // mark selected value to allow immediate typing of new words
+    setTimeout(function() {
+      $.Autocompleter.Selection(input, 0, input.value.length);
+    }, 100);
+  };
+  /* If options.dropdown is defined, add
+     an image button next to the autocomplete field
+     that will open the autocomplete list like
+     a dropdown input
+  */
+  if (options.dropdown) {
+    // the images should have a size of (20x20)px
+    options.dropdown = (options.dropdown === true) ? ['dropdownarrow.png',
+      'dropdownarrow over.png'] : options.dropdown;
+    // always show the drop down list
+    options.minChars = 0;
+    // for a select-list the user can only select exactly one item!
+    options.multiple = false;
+     // for a select-list the user has to type in the exact word!
+    options.mustMatch = true;
+    // do never use matchContains with autoFill!
+    if (options.matchContains)
+      options.autoFill = false;
+    // TODO: Use data.length or something like this
+    // show all results
+    options.max = $.Autocompleter.defaults.max;
+    // case insensitive!
+    options.matchCase = false;
+    options.selectFirst = true;
+    
+    // preload images
+    new Image().src = options.dropdown[1];
+    new Image().src = options.dropdown[1];
+    
+    $input.addClass('dropdown').css({
+      'background': 'white url(' + options.dropdown[0] + ') center right no-repeat',
+      'padding-right': '20px'
+    });
+    
+    $input.mousedown(function(event) {
+      if (on_dropdownarrow(event)) {
+        if (select.visible()) {
+          // list is visible, so the click will hide it but check if value is supported, so call
+          // use hideResultsNow instead of select.hide!
+          hideResultsNow('whole-word');
+          // TODO: prevent mousedown on body?
+          // return false;
+        }
+        else if (hasFocus) {
+          // if hasFocus is false, focus will be fired after this
+          // mousedown causing the list to open,
+          // list is hidden, so the click will show it.
+          open_complete_list();
+          // enable selection of results
+          return false;
+        }
+      }
+      else if (hasFocus && !select.visible()) {
+        open_complete_list();
+        // enable selection of results
+        return false;
+      }
+    }).mousemove(function(event) {
+      if(on_dropdownarrow(event))
+        $input.css({
+          'cursor': 'default',
+          'background': 'white url(' + options.dropdown[1] + ') center right no-repeat'
+        });
+      else {
+        if (!select.visible())
+          $input.css('background',
+              'white url(' + options.dropdown[0] + ') center right no-repeat');
+        $input.css('cursor', 'text');
+      }
+    }).mouseleave(function(event) {
+      if (!select.visible())
+        $input.css({
+          'background': 'white url(' + options.dropdown[0] + ') center right no-repeat'
+      });
+    }).mouseup(function () {
+      config.mouseDownOnSelect = false;
+      // return false;
+    });
+  }
+  
+  // set focus again if mousedown was fired on an result item but no mouseup
+//  $(document).mouseup(function() {
+//    if (config.mouseDownOnSelect) {
+//      config.mouseDownOnSelect = false;
+//      input.focus();
+//    }
+//  });
+  
+  // mouseup on scrollbar does not fire in all browsers so install this event
+  // handler to hide the select after a mouseup on the scrollbar
+  $(document).mousedown(function() {
+    config.mouseDownOnSelect = false;
+    hideResultsNow('whole-word');
+  });
+    
+  var blockSubmit;
+  
+  // prevent form submit in opera when selecting with return key
+  $.browser.opera && $(input.form).bind("submit.autocomplete", function() {
+    if (blockSubmit) {
+      blockSubmit = false;
+      return false;
+    }
+  });
+  
+  // helper function used for KEY.UP, ...
+  var navigate = function(event, action) {
+    // action should be a string
+    
+    event.preventDefault();
+    if (select.visible())
+      select[action]();
+    // open result list if not visible
+    else if (options.dropdown)
+      open_complete_list();
+    else
+      onChange(0, true);
+
+    // Set hasFocus to 1 if key was pressed cause then we are for sure
+    // in the input field
+    if (hasFocus <= 0)
+      hasFocus = 1;
+  };
+  // only opera doesn't trigger keydown multiple times while pressed, others don't work with keypress at all
+  $input.bind(($.browser.opera ? "keypress" : "keydown") + ".autocomplete", function(event) {
+    // track last key pressed
+    lastKeyPressCode = event.keyCode;
+    switch(event.keyCode) {
+
+      case KEY.UP:
+        navigate(event, 'prev');
+        break;
+        
+      case KEY.DOWN:
+        navigate(event, 'next');
+        break;
+        
+      case KEY.PAGEUP:
+        navigate(event, 'pageUp');
+        break;
+        
+      case KEY.PAGEDOWN:
+        navigate(event, 'pageDown');
+        break;
+        
+      // do not trigger search (onchange) when navigating
+      // througth the input box
+      case KEY.LEFT_ARROW: