twidi avatar twidi committed a2bfe45

add a `setup.py` (and mode code to a `sesql` directory)

Comments (0)

Files changed (70)

 *.elc
 *.pyc
 *~
-
+.*.sw?
+build/
+*.egg*

__init__.py

-# -*- coding: utf-8 -*-
-
-# Copyright (c) Pilot Systems and Libération, 2010-2011
-
-# This file is part of SeSQL.
-
-# SeSQL is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 2 of the License, or
-# (at your option) any later version.
-
-# SeSQL is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with SeSQL.  If not, see <http://www.gnu.org/licenses/>.
-
-#
-# !!! WARNING !!! : imports of  signals and monkey patchs are moved to
-# models.py to not create conflicts with Django's app loading...
-#

daemon/__init__.py

-# -*- coding: utf-8 -*-
-
-# Copyright (c) Pilot Systems and Libération, 2010-2011
-
-# This file is part of SeSQL.
-
-# SeSQL is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 2 of the License, or
-# (at your option) any later version.
-
-# SeSQL is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with SeSQL.  If not, see <http://www.gnu.org/licenses/>.
-
-"""
-This the asynchronous reindexation daemon for dependencies
-"""

daemon/cmdline.py

-# -*- coding: utf-8 -*-
-
-# Copyright (c) Pilot Systems and Libération, 2010-2011
-
-# This file is part of SeSQL.
-
-# SeSQL is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 2 of the License, or
-# (at your option) any later version.
-
-# SeSQL is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with SeSQL.  If not, see <http://www.gnu.org/licenses/>.
-
-"""Command line parser, should be converted to argparse ?"""
-
-import sys, getopt
-
-class CmdLine(object):
-    "The command line parser"
-    def __init__(self, argv):
-        self.argv = argv
-        self.all = []
-        self.longs = []
-        self.shorts = ""
-        self.convert = {}
-        self.values = {}
-        self.add_opt("help", "h",
-                     help_msg = "display this help")
-        self.add_opt("version", "v",
-                     help_msg = "display version number and exits")
-
-    def __getitem__(self, item):
-        return self.values[item]
-
-    def __setitem__(self, item, value):
-        self.values[item] = value
-
-    def has_key(self, key):
-        return self.values.has_key(key)
-
-    def items(self):
-        return self.values.items()
-
-    def add_opt(self, long, short = "", value = None, help_msg = ""):
-        "Adds an option to the list of known ones"
-        self.all.append((long, short, value, help_msg))
-        self.values[long] = value
-        self.convert["--" + long] = long
-        if(short):
-            self.convert["-" + short] = long
-            self.shorts = self.shorts + short
-        if(not(value is None)):
-            self.longs.append(long + "=")
-            if(short):
-                self.shorts = self.shorts + ":"
-        else:
-            self.longs.append(long)
-            
-    def parse_opt(self):
-        "Parse the command line"
-        try:
-            optlist, args = getopt.getopt(self.argv[1:], self.shorts, self.longs)
-        except getopt.GetoptError, s:
-            print self.argv[0] + ":", s, ". Try --help."
-            sys.exit(2)
-
-        self.args = args
-        for opt, val in optlist:
-            # Extra key from options
-            while(self.convert.has_key(opt)):
-                opt = self.convert[opt]
-            if(val):
-                self.values[opt] = val
-            else:
-                self.values[opt] = True
-        
-    def show_help(self, extra = ""):
-        print "Syntax: %s %s [<options>]" % (self.argv[0], extra)
-        print "Options:"
-        longest = max([ len(l) for l in self.convert.keys() ])
-        for long, short, value, help_msg in self.all:
-            default = value and "(default: %s)" % value or ""
-            name = "--" + long
-            name += " " * (longest - len(name))
-            if short:
-                name += ", -" + short
-            else:
-                name += "    "
-            print "  %s: %s %s" % (name, help_msg, default)
-            
-            

daemon/sesql-update.py

-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-
-# Copyright (c) Pilot Systems and Libération, 2010-2011
-
-# This file is part of SeSQL.
-
-# SeSQL is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 2 of the License, or
-# (at your option) any later version.
-
-# SeSQL is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with SeSQL.  If not, see <http://www.gnu.org/licenses/>.
-
-"""
-Update the scheduled updates
-"""
-
-import sys, time, traceback, os, logging
-
-from sesql.daemon.unixdaemon import UnixDaemon
-from sesql.daemon.cmdline import CmdLine
-
-from sesql_config import *
-
-from sesql import index, results
-
-from django.db import connection, transaction
-
-
-def version():
-    print "sesql update daemon, v 0.9"
-
-class UpdateDaemon(UnixDaemon):
-    """
-    The daemon class
-    """
-    def __init__(self, chunk, delay, pidfile):
-        UnixDaemon.__init__(self, pidfile)
-        self.chunk = int(chunk)
-        self.delay = float(delay)
-        self.log = logging.getLogger('sesql-update')
-
-    def run(self):
-        """
-        Main loop
-        """
-        while True:
-            try:
-                self.process_chunk()
-            except:
-                type, value, tb = sys.exc_info()        
-                error = traceback.format_exception_only(type, value)[0]
-                print >> sys.stderr, error
-            self.log.debug("Sleeping for %.2f second(s)" % self.delay)
-            time.sleep(self.delay)
-
-    @transaction.commit_manually
-    def process_chunk(self):
-        """
-        Process a chunk
-        """
-        cursor = connection.cursor()    
-        cursor.execute("""SELECT classname, objid
-                          FROM sesql_reindex_schedule
-                          ORDER BY scheduled_at ASC LIMIT %d""" % self.chunk)
-        rows = cursor.fetchall()
-        if not rows:
-            transaction.rollback()
-            return
-        self.log.info("Found %d row(s) to reindex" % len(rows))
-
-        done = set()
-        
-        for row in rows:
-            row = tuple(row)
-            if not row in done:
-                self.log.info("Reindexing %s:%d" % row)
-                done.add(row)
-                obj = results.SeSQLResultSet.load(row)
-                index.index(obj)
-                cursor.execute("""DELETE FROM sesql_reindex_schedule
-                                  WHERE classname=%s AND objid=%s""", row)
-        transaction.commit()
-
-if __name__ == "__main__":
-    cmd = CmdLine(sys.argv)
-    cmd.add_opt('debug', 'd', None, "Run in debug mode (don't daemonize)")
-    cmd.add_opt('chunk', 'c', str(DAEMON_DEFAULT_CHUNK), "Chunk size")
-    cmd.add_opt('wait', 'w', str(DAEMON_DEFAULT_DELAY),
-                "Wait between each chunk")
-    cmd.add_opt('pidfile', 'p', str(DAEMON_DEFAULT_PID), "Pidfile to use")
-    cmd.parse_opt()
-      
-    if cmd["help"]:
-        cmd.show_help()
-        sys.exit(0)
-
-    if cmd["version"]:
-        version()
-        sys.exit(0)
-
-    daemon = UpdateDaemon(cmd["chunk"], cmd["wait"], cmd["pidfile"])
-
-    if cmd["debug"]:
-        ch = logging.StreamHandler()
-        logger = logging.getLogger('sesql-update')
-        ch.setLevel(logging.DEBUG)
-        logger.addHandler(ch)
-        daemon.run()
-    else:
-        daemon.start_deamon()
-

daemon/sesqlctl

-#!/bin/sh -e
-#
-# Startup script for SeSQL reindex daemon
-# You should copy it to /etc/init.d and edit it there.
-# It was only tested on Debian, you may need to adjust it.
-#
-
-# Configure that
-export DJANGO_SETTINGS_MODULE="settings"
-APPS_ROOT="/path/to/django/apps"
-PROJECT_ROOT="/path/to/django/project"
-USER="django"
-PIDFILE="/path/to/run/sesql-update.pid"
-DAEMON="/usr/bin/python"
-OPTIONS="-p $PIDFILE"
-
-
-export PYTHONPATH="$PROJECT_ROOT:$APPS_ROOT"
-OPTIONS="$APPS_ROOT/sesql/daemon/sesql-update.py $OPTIONS"
-export PATH=/usr/sbin:/sbin:$PATH
-
-test -f $DAEMON || exit 0
-
-do_start()
-{
-    echo -n "Starting SeSQL reindex daemon"
-    start-stop-daemon --start --quiet --pidfile $PIDFILE \
-        --exec $DAEMON -c $USER -o -- $OPTIONS
-    echo "."
-}
-
-do_stop()
-{
-    echo -n "Stopping SeSQL reindex daemon"
-    start-stop-daemon --stop --quiet --pidfile $PIDFILE \
-        -c $USER -o --exec $DAEMON
-    echo "."
-}
-
-case "$1" in
-  start) 
-        do_start
-        ;;
-  stop) 
-        do_stop
-        ;;
-  restart|force-reload)
-        do_stop
-        sleep 1
-        do_start
-        ;;
-  *)
-        echo "Usage: $0 {start|stop|restart|force-reload}"
-        exit 1
-        ;;
-esac
-
-exit 0
-

daemon/unixdaemon.py

-# -*- coding: utf-8 -*-
-
-# Copyright (c) Pilot Systems and Libération, 2010-2011
-
-# This file is part of SeSQL.
-
-# SeSQL is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 2 of the License, or
-# (at your option) any later version.
-
-# SeSQL is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with SeSQL.  If not, see <http://www.gnu.org/licenses/>.
-
-import sys, os, time, signal, os.path
-import logging
-
-class LogWrapper(object):
-    """File-like object for log files"""
-    def __init__(self, callback):
-        self.callback = callback
-        self.buffer = ""
-
-    def write(self, msg):
-        if not msg:
-            return
-        if (msg[-1] == "\n"):
-            self.buffer += msg[:-1]
-            self.flush()
-        else:
-            self.buffer += msg
-
-    def flush(self):
-        self.callback(self.buffer)
-        self.buffer = ""
-
-    def seek(self):
-        pass
-
-def logwrap():
-    logger = logging.getLogger("daemon")
-    sys.stdout = LogWrapper(logging.info)
-    sys.stderr = LogWrapper(logging.error)
-    
-def daemonize(pidfile, ):
-    """Do forks and other nice tricks to ensure the code following
-    the call of daemonize will run as a Unix demaon"""
-
-    logwrap()
-
-    try:
-        previous_pid = int(file(pidfile).read())
-        os.kill(previous_pid, signal.SIGCONT)
-        raise RuntimeError("Daemon already running with pid %d" % previous_pid)
-    except OSError:
-        os.remove(pidfile)
-        pass
-    except IOError:
-        pass
-
-    try:
-        pid = os.fork()
-        if pid > 0:
-            count = 0
-            while count < 50:
-                count += 1
-                time.sleep(0.1)
-                try:
-                    return int(file(pidfile).read())
-                except:
-                    pass
-            raise RuntimeError("Daemon not started.")
-    except OSError, e:
-        logging.error("Fork #1 failed: %d (%s)" % (e.errno, e.strerror))
-        raise
-
-    # Decouple
-    os.chdir('/')
-    os.setsid()
-    os.umask(0)
-
-    # Do second fork
-    try:
-        pid = os.fork()
-    except OSError, e:
-        logging.error("Fork #2 failed: %d (%s)", e.errno, e.strerror)
-        sys.exit(1)
-        
-    if pid > 0:
-        # exit from second parent, print new PID before
-        logging.info("Daemon PID %d", pid)
-        try:
-            pidfile = file(pidfile, "w")
-            pidfile.write("%d\n" % pid)
-            pidfile.close()
-        except:
-            os.kill(pid, signal.SIGKILL)
-            sys.exit(1)
-        sys.exit(0)
-
-
-class UnixServiceManager(object):
-    """
-    Base class to implement a service manager, subclass it and implement the
-    run() method.
-    """
-
-    def __init__(self):
-        """
-        Initialise the manager.
-        """
-        self.childpid = -1
-
-    def stop(self):
-        """
-        Stop the child Unix process.
-        """
-        logging.info("Stopping the service...")
-        if self.childpid<1:
-            logging.warn("Service not running!")
-            return
-        try:
-            os.kill(self.childpid, signal.SIGTERM)
-        except OSError, e:
-            logging.error("Kill failed: %d (%s)" % (e.errno, e.strerror))
-        try:
-            os.waitpid(self.childpid, 0)
-        except OSError, e:
-            logging.error("Waitpid failed: %d (%s)" % (e.errno, e.strerror))
-        self.childpid = -1
-
-    def restart(self):
-        """
-        Restart the process.
-        """
-        logging.info("Restarting the service...")
-        self.stop()
-        self.start()
-
-    def start(self):
-        """
-        Start the process.
-        """
-        logging.info("Starting the service...")
-
-        
-        pid = os.fork()
-            
-        if (pid > 0):
-            self.childpid = pid
-            signal.signal(signal.SIGHUP, self.sighup)
-            signal.signal(signal.SIGTERM, self.sigterm)
-            while True:
-                signal.pause()
-        else:
-            self.run()
-            sys.exit(0)
-
-    def run(self):
-        """Run the service, override that"""
-        raise "Not implemented"""
-
-class UnixDaemon (UnixServiceManager):
-    """
-    Base class to implement a deamon, subclass it and implement the run()
-    method.
-
-    SIGHUP: restart
-    SIGTERM: stop and quit
-    """
-
-    def __init__(self, pidfile):
-        """
-        Initialize the demaon.
-        """
-        UnixServiceManager.__init__(self)      
-        self.pidfile = os.path.realpath(pidfile)
-
-    def start_deamon(self):
-        """
-        Start the daemon.
-        """
-        pid = daemonize(self.pidfile)
-        if pid is None:
-            self.start()
-
-        return pid
-
-    def stop_daemon(self):
-        """
-        Stop the daemon.
-        """
-        pid = int(file(self.pidfile).read())
-        os.kill(pid, signal.SIGTERM)
-
-    def sighup(self, signalid, stack):
-        """
-        Handle restarting the service.
-        """
-        logging.info("SIGHUP received, restarting service")
-        self.restart()
-
-    def sigterm(self, signalid, stack):
-        """
-        Sigterm received
-        """
-        logging.info("SIGTERM received, stopping service")
-        self.stop()
-        try:
-            os.remove(self.pidfile)
-        except:
-            pass
-        sys.exit(1)
-
-    def logwrap(self):
-        """
-        Wrap stdout and stderr as log files
-        """
-        logwrap()

datamodel.py

-# -*- coding: utf-8 -*-
-
-# Copyright (c) Pilot Systems and Libération, 2010-2011
-
-# This file is part of SeSQL.
-
-# SeSQL is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 2 of the License, or
-# (at your option) any later version.
-
-# SeSQL is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with SeSQL.  If not, see <http://www.gnu.org/licenses/>.
-
-import sesql_config as config
-from sesql.typemap import typemap
-from django.db import connection, transaction
-
-from sesql.utils import table_exists
-
-def sql_function(func):
-    """
-    Decorator to execute or print SQL statements
-    """
-    def sql_function_inner(execute = False, verbosity = True,
-                           include_drop = False,
-                           **kwargs):
-        sql = func(**kwargs)
-        if not include_drop:
-            sql = [ row for row in sql if not row.startswith('DROP ') ]
-        if verbosity:
-            print
-            for row in sql:
-                print row + ";"
-            print
-        if execute:
-            cursor = connection.cursor()
-            for row in sql:
-                cursor.execute(row)
-    return sql_function_inner
-
-@sql_function
-def create_dictionnary():
-    """
-    Create the dictionnary configuration
-    """
-    return [
-        "DROP TEXT SEARCH CONFIGURATION IF EXISTS public.%s" % config.TS_CONFIG_NAME,
-        "CREATE TEXT SEARCH CONFIGURATION public.%s (COPY = pg_catalog.simple)" % config.TS_CONFIG_NAME,
-        "DROP TEXT SEARCH DICTIONARY IF EXISTS public.%s_dict" % config.TS_CONFIG_NAME,
-        """CREATE TEXT SEARCH DICTIONARY public.%s_dict (
-        TEMPLATE = pg_catalog.simple,
-        STOPWORDS = %s
-)""" % (config.TS_CONFIG_NAME, config.STOPWORDS_FILE),
-        """ALTER TEXT SEARCH CONFIGURATION %s
-        ALTER MAPPING FOR asciiword, asciihword, hword_asciipart WITH %s_dict""" % (config.TS_CONFIG_NAME, config.TS_CONFIG_NAME)
-        ] + getattr(config, "ADDITIONAL_TS_CONFIG", [])
-
-@sql_function
-def create_master_table():
-    """
-    Create the master table, that is, the one from which the others
-    will inherit
-    """
-    schema = "\n  ".join([ field.schema() for field in config.FIELDS ])
-    
-    return [
-        "DROP TABLE IF EXISTS %s CASCADE" % config.MASTER_TABLE_NAME,
-        """CREATE TABLE %s (
-%s
-  PRIMARY KEY (classname, id)
-)""" % (config.MASTER_TABLE_NAME, schema)
-        ]
-
-@sql_function
-def create_table(table = None):
-    """
-    Create given table
-    """
-    if table is None:
-        return []
-    
-    condition = typemap.get_class_names_for(table)
-    condition = ' OR '.join([ "classname = '%s'" % cls for cls in condition ])
-    res = [ "CREATE TABLE %s (CHECK (%s), PRIMARY KEY (classname, id)) INHERITS (%s)" % (table, condition, config.MASTER_TABLE_NAME) ]
-
-    for field in config.FIELDS:
-        res.append(field.index(table))
-        
-    for cross in config.CROSS_INDEXES:
-        res.append("CREATE INDEX %s_%s_index ON %s (%s);" % (table, "_".join(cross), table, ",".join(cross)))
-
-    return res
-    
-@sql_function
-def create_schedule_table():
-    """
-    Create the table to insert the reindex schedule
-    """
-    return [
-        "DROP SEQUENCE IF EXISTS sesql_reindex_id_seq",
-        "CREATE SEQUENCE sesql_reindex_id_seq",
-
-        "DROP TABLE IF EXISTS sesql_reindex_schedule",
-        """CREATE TABLE sesql_reindex_schedule (
-        rowid integer NOT NULL,
-        classname character varying(250) NOT NULL,
-        objid integer NOT NULL,
-        scheduled_at timestamp NOT NULL DEFAULT NOW(),
-        PRIMARY KEY (rowid)
-        )""",
-        "CREATE INDEX sesql_reindex_schedule_date_index ON sesql_reindex_schedule (scheduled_at)",
-        "CREATE INDEX sesql_reindex_schedule_content_index ON sesql_reindex_schedule (classname, rowid)"
-    ]
-
-
-def sync_db(verbosity = 0, interactive = False, signal = None, **kwargs):
-    if hasattr(signal, "_sesql_syncdb_done"):
-        return
-    signal._sesql_syncdb_done = True
-
-    if not table_exists(config.MASTER_TABLE_NAME):
-        create_dictionnary(execute = True, verbosity = verbosity, include_drop = True)
-        create_master_table(execute = True, verbosity = verbosity, include_drop = True)
-    elif verbosity:
-        print "SeSQL : Table %s already existed, skipped." % config.MASTER_TABLE_NAME
-        
-    for table in typemap.all_tables():
-        if not table_exists(table):
-            create_table(table = table, execute = True, verbosity = verbosity)
-        elif verbosity:
-            print "SeSQL : Table %s already existed, skipped." % table
-
-    if not table_exists("sesql_reindex_schedule"):
-        create_schedule_table(execute = True, verbosity = verbosity, include_drop = True)
-    elif verbosity:
-        print "SeSQL : Table %s already existed, skipped." % 'sesql_reindex_schedule'
-    

fieldmap.py

-# -*- coding: utf-8 -*-
-
-# Copyright (c) Pilot Systems and Libération, 2010-2011
-
-# This file is part of SeSQL.
-
-# SeSQL is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 2 of the License, or
-# (at your option) any later version.
-
-# SeSQL is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with SeSQL.  If not, see <http://www.gnu.org/licenses/>.
-
-"""
-Handle the field map
-"""
-
-from django.db import models
-import sesql_config as config
-
-class FieldMap(object):
-    """
-    Handle the classes <=> table mapping
-    """
-    def __init__(self):
-        """
-        Constructor
-        """
-        self.fields_map = {}
-        self.fields = config.FIELDS
-        self.primary = None
-
-        for field in config.FIELDS:
-            if field.primary:
-                self.primary = field
-            self.fields_map[field.name] = field
-
-    def all_fields(self):
-        """
-        List all fields
-        """
-        return self.fields
-
-    def get_field(self, field):
-        """
-        Get the real field from its name
-        """
-        if isinstance(field, (str, unicode)):
-            return self.fields_map[field]
-        return field
-    __getitem__ = get_field
-
-    def get_primary(self):
-        """
-        Get the primary field if any
-        """
-        return self.primary
-
-fieldmap = FieldMap()

fields.py

-# -*- coding: utf-8 -*-
-
-# Copyright (c) Pilot Systems and Libération, 2010-2011
-
-# This file is part of SeSQL.
-
-# SeSQL is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 2 of the License, or
-# (at your option) any later version.
-
-# SeSQL is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with SeSQL.  If not, see <http://www.gnu.org/licenses/>.
-
-"""
-Contain the field types for SeSQL
-We cannot reuse Django types because what we need is too specific
-"""
-
-import unicodedata, locale
-from sources import guess_source, ClassSource
-import sesql_config as config
-
-import logging
-log = logging.getLogger('sesql')
-
-
-class Field(object):
-    """
-    This represent an abstract field
-    """
-    primary = False
-    slqtype = None
-    indexfunction = ""
-    placeholder = "%s"
-    
-    def __init__(self, name, source = None, sql_default = None):
-        """
-        Constructor
-        name = name for the field in our database
-        sources = list of names used in input
-        """
-        self.name = name
-        self.index_column = name
-        self.sql_default = sql_default
-        self.data_column = name
-        if not source:
-            source = name
-        self.source = guess_source(source)
-
-    def schema(self):
-        """
-        Get the field definition
-        """
-        schema = "%s %s" % (self.name, self.sqltype)
-        if self.sql_default:
-            schema += " DEFAULT %s" % self.sql_default
-        return schema + ","
-
-    def index(self, tablename):
-        """
-        Get the index defintion
-        """
-        index = "CREATE INDEX %s_%s_index ON %s " % (tablename, self.name,
-                                                     tablename)
-        if self.indexfunction:
-            index += "USING %s " % self.indexfunction
-        index = index + "(%s);" % self.index_column
-        return index
-
-    def marshall(self, value):
-        """
-        Marshall the value to SQL
-        """
-        if not value:
-            return None
-        if isinstance(value, unicode):
-            return value.encode(config.CHARSET, 'ignore')
-        return str(value)
-
-    def get_default(self, value):
-        """
-        Get the default pattern
-        """
-        return self.index_column + ' = %s', [ self.marshall(value) ]
-
-    def get_in(self, value):
-        """
-        Get the pattern for __in operator
-        """
-        if not isinstance(value, (list, tuple)):
-            raise ValueError, "__in requires a list or tuple"
-
-        value = [ self.marshall(val) for val in value ]
-        patt = [ "%s" for val in value ]
-        return self.index_column + " IN (" + ",".join(patt) + ")", value
-
-    @property
-    def index_columns(self):
-        """
-        Get the columns to populate at indexation time
-        """
-        return [ self.data_column ]
-
-    @property
-    def index_placeholders(self):
-        """
-        Get the placeholders to use at indexation time
-        """
-        return [ self.placeholder ]
-
-    def get_values(self, obj):
-        """
-        Get value(s) of this field for the object
-        """
-        return [ self.marshall(self.source.load_data(obj)) ]
-
-class IntField(Field):
-    """
-    This is a single integer field
-    """
-    sqltype = "integer"
-
-    def marshall(self, value):
-        """
-        Marshall the value to SQL
-        """
-        if value is None:
-            return None
-        return int(value)
-
-    def get_lt(self, value):
-        """
-        Get the __lt pattern
-        """
-        return self.index_column + ' < %s', [ self.marshall(value) ]
-
-    def get_lte(self, value):
-        """
-        Get the __lt pattern
-        """
-        return self.index_column + ' <= %s', [ self.marshall(value) ]
-
-    def get_gt(self, value):
-        """
-        Get the __lt pattern
-        """
-        return self.index_column + ' > %s', [ self.marshall(value) ]
-
-    def get_gte(self, value):
-        """
-        Get the __lt pattern
-        """
-        return self.index_column + ' >= %s', [ self.marshall(value) ]
-
-    def get_range(self, value):
-        """
-        Get the __range pattern
-        """
-        if not isinstance(value, (list, tuple)) or not len(value) == 2:
-            raise ValueError, "__range requires a couple as parameters"
-
-        pattern = '(%s >= %%s) AND (%s <= %%s)' % (self.index_column,
-                                                   self.index_column)
-        values = [ self.marshall(value[0]), self.marshall(value[1]) ]
-
-        return pattern, values
-
-class LongIntField(IntField):
-    """
-    This is a bigint field
-    """
-    sqltype = "bigint"
-
-class StrField(Field):
-    """
-    This is a simple string field, with specified length
-    """
-    def __init__(self, name, source = None, size = 255):
-        """
-        Constructor
-        Takes one extra paramater: the field size
-        """
-        super(StrField, self).__init__(name, source)
-        self.size = size
-        self.sqltype = "varchar(%d)" % size
-
-class ClassField(Field):
-    """
-    This is a field storing the class of the object
-    """
-    sqltype = "varchar(255)"
-    
-    def __init__(self, name):
-        """
-        Constructor
-        """
-        super(ClassField, self).__init__(name, None)
-        self.source = ClassSource()
-
-    def marshall(self, value):
-        """
-        Marshall the value to SQL
-        """
-        if hasattr(value, "__name__"):
-            value = value.__name__
-        return value
-
-class DateField(IntField):
-    """
-    This a date only field, inherit from IntField so we get all the < > <= ...
-    """
-    sqltype = "date"
-
-    def marshall(self, value):
-        """
-        Marshall a date field
-        """
-        if not value:
-            return None
-        if hasattr(value, "strftime"):
-            return value.strftime("%Y-%m-%d")
-        return value and str(value) or None
-
-class DateTimeField(DateField):
-    """
-    This a date and time field
-    """
-    sqltype = "timestamp"
-
-    def marshall(self, value):
-        """
-        Marshall a date field
-        """
-        if not value:
-            return None
-        if hasattr(value, "strftime"):
-            return value.strftime("%Y-%m-%d %H:%M:%S %z")
-        return value and str(value) or None
-
-class IntArrayField(Field):
-    """
-    This is an array of integer
-    """
-    sqltype = "integer[]"
-    indexfunction = "GIN"
-
-    def marshall(self, value):
-        """
-        Marshall the values to SQL - input must be a list or tuple
-        """
-        if not value:
-            return None
-        if isinstance(value, (list, tuple)):
-            return "{" + ",".join([ str(v) for v in value if v ]) + "}"
-        else:
-            return str(value)
-
-    def operator(self, operator, values):
-        """
-        Get a SQL expression for this operator
-        """
-        pattern = "%s %s %%s" % (self.index_column, operator)
-        values = self.marshall(values)
-        return pattern, [ values ]
-
-    def get_default(self, value):
-        """
-        Get the default pattern
-        """
-        return self.operator("@>", [ value ])
-    
-    def get_in(self, value):
-        """
-        Get the pattern for __in operator
-        """
-        raise ValueError, " __in = not supported for IntArrayField"
-
-    def get_all(self, value):
-        """
-        Get the pattern for __all operator
-        """
-        if not isinstance(value, (list, tuple)):
-            raise ValueError, "__all requires a list or tuple"
-
-        return self.operator("@>", value)
-    
-    def get_any(self, value):
-        """
-        Get the pattern for __any operator
-        """
-        if not isinstance(value, (list, tuple)):
-            raise ValueError, "__any requires a list or tuple"
-
-        return self.operator("&&", value)
-
-
-class FullTextField(Field):
-    """
-    This is a full text field
-    """
-    indexfunction = "GIN"
-    dictionnary = "public.%s" % config.TS_CONFIG_NAME
-
-    def __init__(self, name, source = None, primary = False,
-                 dictionnary = None, cleanup = None):
-        """
-        Constructor
-        """
-        super(FullTextField, self).__init__(name, source)
-        self.index_column = name + "_tsv"
-        self.data_column = name + "_text"
-        self.primary = primary
-        self.cleanup = cleanup
-        
-        # If dictionnary is specified, overrides default
-        if dictionnary:
-            self.dictionnary = dictionnary
-
-    def marshall(self, value, extra_letters = "", use_cleanup = True):
-        """
-        Strip accents, escape html_entities, handle unicode, ...
-        """
-        if not value:
-            return u""
-
-        if isinstance(value, unicode):
-            value = value.encode(config.CHARSET)
-
-        if use_cleanup:
-            cleanup = self.cleanup or getattr(config, 'ADDITIONAL_CLEANUP_FUNCTION', None)
-            if cleanup:
-                value = cleanup(value)
-
-        if not isinstance(value, unicode):
-            try:
-                value = value.decode(config.CHARSET)
-            except UnicodeDecodeError:
-                raise ValueError, "Can't parse %s in %s" % (value, config.CHARSET)
-
-        # Replace non-standard character by spaces
-        def isletter(c):
-            category = unicodedata.category(c)[0]
-            return category in ('L', 'N') or c in extra_letters
-        value = u''.join([ isletter(c) and c or u' ' for c in value ])
-
-        # Now strip accents
-        value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore')
-        value = value.lower()
-        return value
-
-    def schema(self):
-        """
-        Get the field definition
-        """
-        return """%s text,
-  %s tsvector,""" % (self.data_column, self.index_column)
-
-    def index(self, tablename):
-        """
-        Get the index defintion
-        """
-        value = super(FullTextField, self).index(tablename)
-        value += """
-ALTER TABLE %s ALTER COLUMN %s SET STATISTICS 1000;""" % (tablename,
-                                                           self.index_column)
-        return value
-
-    def get_default(self, value):
-        """
-        Get the default pattern
-        """
-        raise ValueError, " = not supported for FullTextField"
-    
-    def get_in(self, value):
-        """
-        Get the pattern for __in operator
-        """
-        raise ValueError, " __in = not supported for FullTextField"
-
-    def pattern_contains(self, value):
-        """
-        Get the pattern for __contains* operators, in raw mode
-        (return indexname, operator, value)
-        """
-        pattern = "plainto_tsquery('%s', %%s)" % self.dictionnary
-        values = [ self.marshall(value) ]
-
-        return self.index_column, pattern, values
-
-    def pattern_matches(self, value):
-        """
-        Get the pattern for __matches operator, in raw mode
-        (return indexname, operator, value)
-        """
-        pattern = "to_tsquery('%s', %%s)" % self.dictionnary
-        values = [ self.marshall(value, extra_letters = '&|!()') ]
-
-        return self.index_column, pattern, values
-
-    def get_containswords(self, value):
-        """
-        Get the pattern for __containswords operator
-        """
-        column, pattern, values = self.pattern_contains(value)
-        pattern = "%s @@ %s" % (column, pattern)
-        return pattern, values
-    
-    def get_containsexact(self, value):
-        """
-        Get the pattern for __containsexact operator - can be slow.
-        """
-        pattern, values = self.get_containswords(value)
-        pattern = "(%s AND %s)" % (pattern, "%s LIKE %%s" % self.data_column)
-        values = [ values[0], '%' + values[0]  + '%' ]
-
-        return pattern, values
-
-    def get_matches(self, value):
-        """
-        Get the pattern for __matches operator (PostgreSQL tsquery string)
-        """
-        column, pattern, values = self.pattern_matches(value)
-        pattern = "%s @@ %s" % (column, pattern)
-        return pattern, values
-
-    def get_like(self, value):
-        """
-        Get the pattern for __like operator - SLOW ! SLOW ! SLOW !
-        """
-        pattern = "%s LIKE %%s" % (self.data_column)
-        values = [ self.marshall(value, extra_letters = '%') ]
-        return pattern, values
-
-    def rank_containswords(self, value):
-        """
-        Get the ranking pattern for __containswords operator
-        """
-        return self.pattern_contains(value)
-    
-    def rank_containsexact(self, value):
-        """
-        Get the ranking pattern for __containsexact operator - can be slow.
-        """
-        log.warning("Ranking on exact will fall back to ranking on contains")
-        return self.rank_containswords(value)
-
-    def rank_matches(self, value):
-        """
-        Get the ranking pattern for __matches operator
-        """
-        return self.pattern_matches(value)
-
-    @property
-    def index_columns(self):
-        """
-        Get the columns to populate at indexation time
-        """
-        return [ self.data_column, self.index_column ]
-
-    @property
-    def index_placeholders(self):
-        """
-        Get the placeholders to use at indexation time
-        """
-        if hasattr(self.source, "weights"):
-            weights = self.source.weights
-            vals = []
-            for weight in weights:
-                vals.append("setweight(to_tsvector('%s', %%s), '%s')" % (self.dictionnary, weight))
-            vals = '||'.join(vals)
-        else:
-            vals = "to_tsvector('%s', %%s)" % self.dictionnary
-        return [ self.placeholder, vals ]
-
-    def get_values(self, obj):
-        """
-        Get values for the object
-        """
-        vals = [ self.marshall(self.source.load_data(obj)) ]
-        if hasattr(self.source, "weights"):
-            weights = self.source.weights
-            for weight in weights:
-                vals.append(self.marshall(self.source.load_data(obj, weight)))
-        else:
-            vals.append(self.marshall(self.source.load_data(obj)))
-        return vals
-

highlight.py

-# -*- coding: utf-8 -*-
-
-# Copyright (c) Pilot Systems and Libération, 2010-2011
-
-# This file is part of SeSQL.
-
-# SeSQL is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 2 of the License, or
-# (at your option) any later version.
-
-# SeSQL is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with SeSQL.  If not, see <http://www.gnu.org/licenses/>.
-
-from sesql.lemmatize import lemmatize
-from sesql.fieldmap import fieldmap
-import string
-
-def highlight(text, words, index = None):
-    """
-    Give the position of words in a text, cleaning everything as sesql does
-    That can be used to highlight the words, for example
-    The index will be use to lemmatize, if none, it'll use the default one
-    """
-    if not text:
-        return []
-    
-    if index is None:
-        index = fieldmap.primary
-
-    if index is None:
-        raise ValueError, "Not index given and no primary one"
-
-    size = len(text)
-    letters = set(string.ascii_letters)
-    
-    # Lemmatize the words
-    lems = lemmatize(words, index)
-
-    # Marshall everything
-    text = index.marshall(text, use_cleanup = False)
-
-    # Now find the lemmatized words inside the text
-    found = []
-    foundwords = set()
-    for i, lem in enumerate(lems):
-        if not lem:
-            continue
-        wordsize = len(lem)
-        pos = 0
-        while True:
-            begin = text.find(lem, pos)
-            if begin < 0:
-                break
-            end = begin + wordsize
-
-            # We found something, ensure it's a normal word
-            if begin and text[begin - 1] in letters:
-                pos = end
-                continue
-
-            # Now find the end of the word
-            while end < size and text[end] in letters:
-                end += 1
-
-            found.append((begin, end, i))
-            foundwords.add(text[begin:end])
-            pos = end
-
-    # Lemmatize all found words
-    foundwords = list(foundwords)
-    foundlems = lemmatize(foundwords, index)
-    foundlems = dict(zip(foundwords, foundlems))
-
-    # And now, second pass, ensure lemmatized version of word is word
-    results = []
-    for begin, end, i in found:
-        word = text[begin:end]
-        lem = foundlems[word]
-        wanted_lem = lems[i]
-        if lem == wanted_lem:
-            results.append((begin, end, i))
-
-    return results
-        
-            
-        
-    

index.py

-# -*- coding: utf-8 -*-
-
-# Copyright (c) Pilot Systems and Libération, 2010-2011
-
-# This file is part of SeSQL.
-
-# SeSQL is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 2 of the License, or
-# (at your option) any later version.
-
-# SeSQL is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with SeSQL.  If not, see <http://www.gnu.org/licenses/>.
-
-import sesql_config as config
-from sesql.typemap import typemap
-from sesql.fieldmap import fieldmap
-from sesql import utils
-from django.db import connection
-
-import logging
-log = logging.getLogger('sesql')
-
-def index_log_wrap(function):
-    """
-    Log wrap the method, giving it a name and logging its time
-    """
-    def inner(obj, *args, **kwargs):
-        classname, objid = get_sesql_id(obj)
-        message = "%s (%s:%s)" % (function.__name__, classname, objid)
-        return utils.log_time(function, message)(obj, message, *args, **kwargs)
-    inner.__name__ = function.__name__
-    return inner
-
-def get_values(obj, fields):
-    """
-    Get SQL keys, placeholders and results for this object and those fields
-    """
-    keys = [ ]
-    placeholders = [ ]
-    results = [ ]
-
-    for field in config.FIELDS:
-        keys.extend(field.index_columns)
-        placeholders.extend(field.index_placeholders)
-        results.extend(field.get_values(obj))
-
-    return keys, placeholders, results
-
-def get_sesql_id(obj):
-    """
-    Get classname and id, the SeSQL identifiers
-    """
-    def get_val(field):
-        return fieldmap[field].get_values(obj)[0]
-    return (get_val('classname'), get_val('id'))
-
-@index_log_wrap
-def index(obj, message, noindex = False):
-    """
-    Index a Django object into SeSQL, do the real work
-    """
-    cursor = connection.cursor()
-    log.info("%s : entering" % message)
-    classname, objid = get_sesql_id(obj)
-
-    # Handle dependancies
-    gro = getattr(obj, "get_related_objects_for_indexation", None)
-    if gro:
-        related = gro()
-        nbrelated = len(related)
-        for item in related:
-            if hasattr(item, "id"):
-                # Django object ? fecth class and id
-                item = get_sesql_id(obj)
-            cursor.execute("SELECT nextval('sesql_reindex_id_seq')")
-            cursor.execute("INSERT INTO sesql_reindex_schedule (rowid, classname, objid) SELECT currval('sesql_reindex_id_seq'), %s, %s", item)
-    else:
-        nbrelated = 0        
-
-    log.info("%s : %d dependancies found" % (message, nbrelated))
-
-    table_name = typemap.get_table_for(classname)
-    if not table_name:
-        log.info("%s: no table found, skipping" % message)
-        return
-
-    query = "DELETE FROM %s WHERE id=%%s AND classname=%%s" % table_name
-    cursor.execute(query, (objid, classname))
-
-    if noindex:
-        log.info("%s : running in 'noindex' mode, only deleteing" % message)
-        return
-    
-    if config.SKIP_CONDITION and config.SKIP_CONDITION(obj):
-        log.info("%s : not indexing because of skip_condition" % message)
-        return
-    
-    log.info("%s : indexing entry in table %s" % (message, table_name))
-
-    keys, placeholders, results = get_values(obj, config.FIELDS)
-    
-    query = "INSERT INTO %s (%s) VALUES (%s)" % (table_name,
-                                                 ",".join(keys),
-                                                 ",".join(placeholders))
-    try:
-        cursor.execute(query, results)
-    except:
-        log.error('Exception caught while inserting (%s,%s) into %s' %
-                  (classname, objid, table_name))
-        raise
-    cursor.close()
-
-@index_log_wrap
-def unindex(obj, message):
-    """
-    Unindex the object
-    """
-    return index(obj, noindex = True)
-
-@index_log_wrap
-def update(obj, message, fields):
-    """
-    Update only specific fields of given object
-    """
-    log.info("%s : entering for fields %s" % (message, ','.join(fields)))
-
-    table_name = typemap.get_table_for(obj.__class__)
-    if not table_name:
-        log.info("%s : not table, skipping" % message)
-        return
-
-    fields = [ fieldmap.get_field(field) for field in fields ]
-    keys, placeholders, results = get_values(obj, fields)
-
-    pattern = [ '%s=%s' % (k,p) for k,p in zip(keys, placeholders) ]
-
-    if not pattern:
-        log.info("%s : nothing to update, skipping" % message)
-        return
-
-    pattern = ",".join(pattern)
-
-    query = "UPDATE %s SET %s WHERE classname=%%s AND id=%%s" % (table_name,
-                                                                 pattern)
-    cursor = connection.cursor()    
-    cursor.execute(query, results + [ obj.__class__.__name__, obj.id ])
-    cursor.close()
-
-    

lemmatize.py

-# -*- coding: utf-8 -*-
-
-# Copyright (c) Pilot Systems and Libération, 2010-2011
-
-# This file is part of SeSQL.
-
-# SeSQL is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 2 of the License, or
-# (at your option) any later version.
-
-# SeSQL is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with SeSQL.  If not, see <http://www.gnu.org/licenses/>.
-
-from sesql.fieldmap import fieldmap
-from django.db import connection
-
-# Use GenericCache for now, but will probably be moved to memcached later
-from GenericCache import GenericCache
-_word_cache = GenericCache(maxsize = 10000, expiry = 86400)
-
-def lemmatize_for(words, dictionnary):
-    """
-    Lemmatize a word with given dictionnary
-    """
-    values = {}
-    remaining = []
-
-    for word in words:
-        value = _word_cache[(word, dictionnary)]
-        if value is not None:
-            values[word] = value
-        else:
-            remaining.append(word)
-
-    if remaining:
-        pattern = "plainto_tsquery('%s', %%s)" % dictionnary
-        patterns = [ pattern for word in remaining ]
-
-        cursor = connection.cursor()
-        cursor.execute('SELECT %s;' % (','.join(patterns)), remaining)
-        row = cursor.fetchone()
-        for word, value in zip(remaining, row):
-            value = value.strip("'")
-            values[word] = value
-            _word_cache[(word, dictionnary)] = value
-    
-    return [ values[word] for word in words ]    
-
-def lemmatize(words, index = None):
-    """
-    Give a lemmatized version of those words
-    
-    Use the configuration for the given index, or the default one if
-    index is None
-    """
-    if index is None:
-        index = fieldmap.primary
-
-    if index is None:
-        raise ValueError, "Not index given and no primary one"
-
-    words = [ index.marshall(word) for word in words ]
-
-    index = fieldmap.get_field(index)
-    return lemmatize_for(words, index.dictionnary)

longquery.py

-# -*- coding: utf-8 -*-
-
-# Copyright (c) Pilot Systems and Libération, 2010-2011
-
-# This file is part of SeSQL.
-
-# SeSQL is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 2 of the License, or
-# (at your option) any later version.
-
-# SeSQL is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with SeSQL.  If not, see <http://www.gnu.org/licenses/>.
-
-from __future__ import with_statement
-
-import string, random
-from GenericCache import GenericCache
-
-import sesql_config as config
-from sesql.query import SeSQLQuery
-from sesql import utils
-
-import logging
-log = logging.getLogger('sesql')
-
-_query_cache = GenericCache(maxsize = config.QUERY_CACHE_MAX_SIZE,
-                            expiry = config.QUERY_CACHE_EXPIRY)
-
-@utils.log_time
-def longquery(query, order=None, limit=None, queryid=None, historize=False):
-    """
-    Perform a long query and return a lazy Django result set
-
-    If queryid is provided, then the query will be loaded from the
-    cache if possible, and redone else.
-
-    Be careful, if the query is redone, results may have changed.
-    """
-    if queryid:
-        with _query_cache.lock:
-            results = _query_cache[queryid]
-            if results:
-                return results
-            log.warning('Cached query id %r expired, re-querying.' % queryid)
-            
-    query = SeSQLQuery(query, order)
-    results = query.longquery(limit)
-    
-    with _query_cache.lock:
-        # Generate a new query id, ensuring it's unique
-        if not queryid:
-            while True:
-                letters = string.ascii_letters + string.digits
-                queryid = ''.join([ random.choice(letters) for i in range(32) ])
-                if queryid not in _query_cache:
-                    break
-        _query_cache[queryid] = results
-        results.queryid = queryid
-    
-    if historize: # suggest feature hook
-        results.historize(query)
-
-    return results
-    

management/__init__.py

-# -*- coding: utf-8 -*-
-
-# Copyright (c) Pilot Systems and Libération, 2010-2011
-
-# This file is part of SeSQL.
-
-# SeSQL is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 2 of the License, or
-# (at your option) any later version.
-
-# SeSQL is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with SeSQL.  If not, see <http://www.gnu.org/licenses/>.
-
-
-"""
-This module contains a few management commands for SeSQL
-"""

management/commands/__init__.py

-# -*- coding: utf-8 -*-
-
-# Copyright (c) Pilot Systems and Libération, 2010-2011
-
-# This file is part of SeSQL.
-
-# SeSQL is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 2 of the License, or
-# (at your option) any later version.
-
-# SeSQL is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with SeSQL.  If not, see <http://www.gnu.org/licenses/>.
-
-"""
-This module contains a few management commands for SeSQL
-"""

management/commands/build_search_query_index.py

-# -*- coding: utf-8 -*-
-
-# Copyright (c) Pilot Systems and Libération, 2011
-
-# This file is part of SeSQL.
-
-# SeSQL is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 2 of the License, or
-# (at your option) any later version.
-
-# SeSQL is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with SeSQL.  If not, see <http://www.gnu.org/licenses/>.
-
-"""
-This should be runned in a cron to process search histories and compute stats
-"""
-
-from optparse import make_option
-from datetime import datetime
-from datetime import timedelta 
-
-from django.core.management.base import BaseCommand
-
-import settings
-
-from sesql.lemmatize import lemmatize
-from sesql.models import SearchHit
-from sesql.models import SearchQuery
-from sesql.models import SearchHitHistoric
-from sesql.suggest import phonex
-
-import sesql_config as config
-
-
-class Command(BaseCommand):
-    help = """Build SearchQuery index"""
-    
-    option_list = BaseCommand.option_list + (
-        make_option('-e','--erode',
-                    action='store_true',
-                    dest='erode',
-                    help = 'tell if we must erode result or not'),
-            
-        make_option('-f','--filter',
-                    dest ='filter',
-                    type='int',
-                    default=config.HISTORY_DEFAULT_FILTER,
-                    help = 'how many time a search must occur to be treated'))
-    
-    def handle(self, *apps, **options):
-        self.process_hits(options['filter'])
-        
-        if options['erode']:
-            self.erode()
-
-    def erode(self):
-        for search_query in SearchQuery.objects.all():
-            search_query.pondered_search_nb = (config.HISTORY_ALPHA 
-                                               * search_query.pondered_search_nb 
-                                               + (1-config.HISTORY_ALPHA)
-                                               * search_query.nb_recent_search)
-            search_query.nb_recent_search = 0
-            search_query.save()
-        
-    def process_hits(self, filter_nb):
-        last_hits = SearchHit.objects.all()
-
-        processed_hits = []
-
-        for hit in last_hits:
-            query = hit.query
-            
-            # blacklist
-            if query in config.HISTORY_BLACKLIST:
-                continue
-
-            if hit.nb_results < filter_nb:
-                SearchHitHistoric(query=hit.query,
-                                  nb_results=hit.nb_results,
-                                  date=hit.date).save()
-                hit.delete()
-                continue
-            
-            # manual get_or_create
-            try:
-                search_query = SearchQuery.objects.get(query=query)
-                created = False
-            except SearchQuery.DoesNotExist:
-                search_query = SearchQuery(query=query)
-                created = True
-
-            # if it's a new one, initialize it
-            if created:
-                search_query.phonex = phonex(query)
-
-                # clean the query, the '_' char cause bugy clean_query
-                query = query.replace('_', '')  
-
-                lems = lemmatize(query.split())
-
-                clean_query = [lem for lem in lems if lem]
-                clean_query = ' '.join(clean_query)
-
-                clean_phonex = phonex(clean_query)
-
-                search_query.clean_query = clean_query
-                search_query.clean_phonex = clean_phonex
-
-                search_query.nb_total_search = 0
-                search_query.pondered_search_nb = 0
-                search_query.nb_recent_search = 0
-
-            search_query.nb_results = hit.nb_results
-            search_query.nb_total_search += 1
-
-            search_query.pondered_search_nb += 1
-            search_query.nb_recent_search += 1 
-
-            weight = (search_query.pondered_search_nb * config.HISTORY_BETA + 
-                      search_query.nb_results * config.HISTORY_GAMMA)
-            search_query.weight = weight
-            search_query.save()
-
-            # we can now create SearchHitHistoric 
-            SearchHitHistoric(query=hit.query,
-                              nb_results=hit.nb_results,
-                              date=hit.date).save()
-
-            hit.delete()

management/commands/createsesqltables.py

-# -*- coding: utf-8 -*-
-# Copyright (c) Pilot Systems and Libération, 2010-2011
-
-# This file is part of SeSQL.
-
-# SeSQL is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 2 of the License, or
-# (at your option) any later version.
-
-# SeSQL is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with SeSQL.  If not, see <http://www.gnu.org/licenses/>.
-
-"""
-This should be necessary since SQL is executed in a syncdb handler, if you need
-it, this will print the SQL required to generate SeSQL tables
-"""
-
-from django.core.management.base import BaseCommand
-from django.core.management import call_command
-import settings
-from sesql import datamodel
-from sesql.typemap import typemap
-
-class Command(BaseCommand):
-    help = "Dump the commands to create SeSQL tables"
-    
-    def handle(self, *apps, **options):
-        """
-        Handle the command
-        """       
-        print "BEGIN;"
-
-        datamodel.create_dictionnary(include_drop = True)
-        datamodel.create_master_table(include_drop = True)
-        
-        for table in typemap.all_tables():
-            datamodel.create_table(table = table, include_drop = True)
-            
-        datamodel.create_schedule_table(include_drop = True)
-
-        print "COMMIT;"

management/commands/sesqlbench.py

-# -*- coding: utf-8 -*-
-# Copyright (c) Pilot Systems and Libération, 2010-2011
-
-# This file is part of SeSQL.
-
-# SeSQL is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 2 of the License, or
-# (at your option) any later version.
-
-# SeSQL is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with SeSQL.  If not, see <http://www.gnu.org/licenses/>.
-
-"""
-This is a SeSQL simple benchmarking tool.
-"""
-
-# Allow "with" with python2.5
-from __future__ import with_statement
-
-import settings
-import sesql_config as config
-
-import sys, time, threading, random
-
-from sesql.index import index
-from sesql.shortquery import shortquery
-from sesql.longquery import longquery
-from sesql.results import SeSQLResultSet
-from sesql import utils
-from sesql.typemap import typemap
-
-from django.core.management.base import BaseCommand
-from django.core.management import call_command
-from optparse import make_option
-from django.db.models import Q
-
-
-
-class Command(BaseCommand):
-    help = "Perform benchmarks on SeSQL"
-
-    option_list = BaseCommand.option_list + (
-        make_option('-d', '--duration',
-                    dest='duration',
-                    type='float',
-                    default=30.0,
-                    help='Total test duration (default 30 s)'),
-        make_option('-f', '--queryfile',
-                    dest='queryfile',
-                    default=None,
-                    help='File containing query samples to make, one per line'),
-        make_option('--short-threads',
-                    dest='short-threads',
-                    type='int',
-                    default=1,
-                    help='Number of short query threads (default 1)'),
-        make_option('--short-delay',
-                    dest='short-delay',
-                    type='float',
-                    default=0.0,
-                    help='Delay between two short queries in each thread (default 0.0 s)'),
-        make_option('--long-threads',
-                    dest='long-threads',
-                    type='int',
-                    default=0,
-                    help='Number of long query threads (default 0)'),
-        make_option('--long-limit',
-                    dest='long-limit',
-                    type='int',
-                    default=None,
-                    help='Limit (maximum number of results) for long queries'),
-        make_option('--long-order',
-                    dest='long-order',
-                    default=None,
-                    help='Order to use for long queries'),
-        make_option('--long-delay',
-                    dest='long-delay',
-                    type='float',
-                    default=0.0,
-                    help='Delay between two long queries in each thread (default 0.0 s)'),
-        make_option('--index-threads',
-                    dest='index-threads',
-                    type='int',
-                    default=0,
-                    help='Number of re-indexing threads (default 0)'),
-        make_option('--index-delay',
-                    dest='index-delay',
-                    type='float',
-                    default=0.0,
-                    help='Delay between two re-indexing in each thread (default 0.0 s)'),
-        make_option('--index-type',
-                    dest='index-type',
-                    default=None,
-                    help='Content-type to reindex'),
-        )
-
-    def handle(self, *apps, **options):
-        """
-        Handle the command
-        """
-        self.options = options
-
-        # If we have at least a query thread, build list of queries
-        if options["long-threads"] or options["short-threads"]:
-            if not options["queryfile"]:
-                print "--queryfile is mandatory if a query thread is enabled"
-                sys.exit(1)
-            print "Loading queries list..."
-            self.queries = [ (q.strip(), eval(q)) for q in open(options["queryfile"]) if q.strip() ]
-
-        # If we have at least a reindex thread, load list of objects
-        if options["index-threads"]:
-            if not options["index-type"]:
-                print "--index-type is mandatory if a reindex thread is enabled"
-                sys.exit(1)
-            self.classname = options["index-type"]
-            self.klass = typemap.get_class_by_name(self.classname)
-            if not hasattr(self.klass, "objects"):
-                print "No such type : ", self.classname
-                sys.exit(1)
-            print "Loading object ids list..."
-            klass = typemap.get_class_by_name(options["index-type"])
-            self.allids = [ int(a['id']) for a in klass.objects.values('id') ]
-
-        # Load queries
-        self.short = []
-        self.long = []
-        self.index = []
-        self.threads = []
-        self.lock = threading.RLock()
-        self.lock.acquire()
-
-        # Starting threads
-        print "Starting threads..."
-        self.start_threads(options['short-threads'], self.handle_short,
-                           self.short, options["short-delay"])
-        self.start_threads(options['long-threads'], self.handle_long,
-                           self.long, options["long-delay"])
-        self.start_threads(options['index-threads'], self.handle_index,
-                           self.index, options["index-delay"])
-
-        # Waiting
-        print "Running benchmark..."
-        self.running = True
-        self.lock.release()
-        time.sleep(options["duration"])
-
-        # Killing threads
-        print "Killing threads..."
-        self.kill_threads()
-
-        # Display results
-        self.display_results()
-                       
-    def start_threads(self, nb, callback, store, delay):
-        """
-        Start nb threads for this activity
-        """
-        for i in range(nb):
-            thread = threading.Thread(target = self.mainloop,
-                                      args = (callback, store, delay))
-            self.threads.append((thread))
-            thread.start()
-
-    def mainloop(self, callback, store, delay):
-        """
-        Run the mainloop of each thread
-        """
-        self.lock.acquire()
-        self.lock.release()
-        timer = utils.Timer()
-        while self.running:
-            with timer:
-                name = callback()
-            store.append((timer.peek(), name))
-            time.sleep(delay)
-
-    def kill_threads(self):
-        """
-        Kill all the threads
-        """
-        self.running = False
-        for thread in self.threads:
-            thread.join()
-
-    def display_results(self):
-        """
-        Display the results
-        """
-        print ""
-        print "** Results **"
-        print ""
-        self.display_results_for("Short queries", self.short)
-        self.display_results_for("Long queries", self.long)
-        self.display_results_for("Index", self.index)
-
-    def display_results_for(self, name, values):
-        """
-        Display the results for one category
-        """
-        if values:
-            print "*", name
-
-            nb = len(values)
-            total = sum([ v[0] for v in values ])
-            average = total / nb
-
-            def print_above(values, nb, threshold):
-                count = len([ v for v in values if v[0] > threshold ])
-                print " %d above %.2f (%.2f %%)" % (count, threshold, count * 100.0 / nb)
-
-            print " %d values, average is %.3f, rate is %.2f" % (nb, average, nb / self.options["duration"])
-            print_above(values, nb, 20.0)
-            print_above(values, nb, 10.0)
-            print_above(values, nb, 5.0)
-            print_above(values, nb, 2.0)
-            print_above(values, nb, 1.0)
-            values.sort()
-            values.reverse()
-            print " top ten: "
-            for val, name in values[:10]:
-                print "  - %.2f : %s" % (val, name)
-
-    def handle_index(self):
-        """
-        Handle a reindexation
-        """
-        objid = random.choice(self.allids)
-        obj = SeSQLResultSet.load((self.classname, objid))
-        index(obj)
-        return "(%s, %s)" % (self.classname, objid)
-
-    def handle_short(self):
-        """
-        Handle a short query
-        """
-        query = random.choice(self.queries)
-        res = shortquery(query[1])
-        return query[0] + " : %d results" % len(res)
-
-    def handle_long(self):
-        """
-        Handle a long query
-        """
-        query = random.choice(self.queries)
-        res = longquery(query[1], limit = self.options["long-limit"],
-                        order = self.options["long-order"])
-        return query[0] + " : %d results" % len(res)
-        

management/commands/sesqlindex.py

-# -*- coding: utf-8 -*-
-# Copyright (c) Pilot Systems and Libération, 2010-2011
-
-# This file is part of SeSQL.
-
-# SeSQL is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 2 of the License, or
-# (at your option) any later version.
-
-# SeSQL is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with SeSQL.  If not, see <http://www.gnu.org/licenses/>.
-
-"""
-This command will index or reindex a single object into SeSQL
-Can be used as a test, or to fix a single problem
-"""
-
-from django.core.management.base import BaseCommand
-from django.core.management import call_command
-from django.db import connection, transaction
-from django.db.models import Q
-import settings
-from sesql.results import SeSQLResultSet
-from sesql.index import index
-import sys
-
-class Command(BaseCommand):
-    help = "Index a single object into SeSQL"
-
-    @transaction.commit_manually
-    def handle(self, *apps, **options):
-        """
-        Handle the command
-        """
-        if len(apps) != 2:
-            print "Syntax : sesqlindex <classname> <objid>"
-            sys.exit(1)
-        
-        obj = SeSQLResultSet.load(apps)
-        index(obj)
-        
-        transaction.commit()
-        

management/commands/sesqllongquery.py

-# -*- coding: utf-8 -*-
-# Copyright (c) Pilot Systems and Libération, 2010-2011
-
-# This file is part of SeSQL.
-
-# SeSQL