Commits

Scott Wilson  committed 52898ff

Adds patch providing multi-schema support to the match spy.

  • Participants
  • Parent commits 9cb9231

Comments (0)

Files changed (2)

File multi-schema

+Add user_data arbitrary data storage to schema attributes.  Adds the ability to xapian.SchemaValueCountMatchSpy to process documents indexed with multiple shcemas.
+
+diff --git a/xodb/backends/xapian/base.py b/xodb/backends/xapian/base.py
+--- a/xodb/backends/xapian/base.py
++++ b/xodb/backends/xapian/base.py
+@@ -8,7 +8,6 @@
+ import xapian
+ import itertools
+ from xapian import MatchDecider, QueryParser, Query
+-from .spies import TermCountMatchSpy
+ from cPickle import dumps, loads
+ 
+ from xodb import Schema, Backend, Attribute
+@@ -620,6 +619,7 @@
+ 
+     def term_counter(self, prefixes):
+         """Construct a term count match spy with this instance's prefix dict."""
++        from .spies import TermCountMatchSpy
+         prefix_map = self.relevance_prefixes.copy()
+         prefix_map.update(self.boolean_prefixes)
+         return TermCountMatchSpy(prefixes, prefix_map)
+diff --git a/xodb/backends/xapian/spies.py b/xodb/backends/xapian/spies.py
+--- a/xodb/backends/xapian/spies.py
++++ b/xodb/backends/xapian/spies.py
+@@ -5,6 +5,7 @@
+ from cPickle import loads
+ 
+ from xapian import MatchDecider
++from .base import XapianSchema
+ 
+ 
+ class TermCountMatchSpy(MatchDecider):
+@@ -53,9 +54,16 @@
+ 
+     :param schema: The schema that defines which values to count, and
+     whether they are atoms, sequences, or mappings.
++    Optionally a callable, called with the document to return the
++    corresponding schema.
+ 
+     :param backend: The xapian backend used to resolve value name to
+     number mapping.
++
++    Keeps track of all the attributes seen accross all schemas
++    for use in display.  Attributes with the same name in different
++    schema will be mixed up by design.  They should have the same
++    meaning / properties.
+     """
+ 
+     def __init__(self, schema, backend, facet_on=None):
+@@ -66,25 +74,51 @@
+         self.schema = schema
+         self.backend = backend
+         self.facet_on = facet_on
++        self.attribute_use_count = {}
++        """All value attributes from all schemas we've seen."""
++        self.schema_value_attributes = {}
+ 
+-        for name,attr in schema.__attributes__.items():
+-            if attr.value_field:
+-                if facet_on:
+-                    if attr.name not in facet_on:
+-                        continue
+-                self.values[name] = {}
+-
+-    def _tally(self, valname, value):
+-        if value:
+-            vd = self.values[valname]
++    def _tally(self, attr, value):
++        valname = attr.name
++        vd = self.values.setdefault(valname, dict())
++        if attr.sequence:
++            for v in value:
++                if v:
++                    vd[v] = vd.get(v, 0) + 1
++                    self.values_seen += 1
++        elif attr.mapping:
++            pass # not sure...
++        else:
+             vd[value] = vd.get(value, 0) + 1
+             self.values_seen += 1
++        count = self.attribute_use_count.setdefault(attr, 0)
++        self.attribute_use_count[attr] = count + 1
++
++    def _value_attributes(self, schema):
++        attrs = [a for a in schema.__attributes__.values() if a.value_field]
++        if self.facet_on:
++            attrs = filter(lambda a: a.name in self.facet_on, attrs)
++        return attrs
++
++    def document_value_attributes(self, doc):
++        """Return a list of schema value attributes relevant to this document.
++
++        This will get called once for each document so we keep track of schema
++        we've seen previously.
++        """
++        schema = self.schema
++        if not hasattr(schema, '__attributes__'):
++            schema = schema(doc, self.backend)
++        if not schema in self.schema_value_attributes:
++            attrs = self._value_attributes(schema)
++            self.schema_value_attributes[schema] = attrs
++        return self.schema_value_attributes[schema]
+ 
+     def __call__(self, doc):
+         self.documents_seen += 1
+ 
+-        for valname in self.values:
+-            attr = self.schema.__attributes__[valname]
++        for attr in self.document_value_attributes(doc):
++            valname = attr.name
+             valno = self.backend.values[valname]
+             value = doc.get_value(valno)
+             if not value:
+@@ -92,13 +126,7 @@
+             if not attr.sortable:
+                 value = loads(value)
+             if value:
+-                if attr.sequence:
+-                    for v in value:
+-                        self._tally(valname, v)
+-                elif attr.mapping:
+-                    pass # not sure...
+-                else:
+-                    self._tally(valname, value)
++                self._tally(attr, value)
+         return True
+ 
+     def top_values(self, valname, maxvalues=None):
+diff --git a/xodb/schema.py b/xodb/schema.py
+--- a/xodb/schema.py
++++ b/xodb/schema.py
+@@ -19,10 +19,10 @@
+     :param volatile: If true, the attribute, if present, is not stored.
+     """
+ 
+-    def __init__(self, type=None, required=True, volatile=False, 
+-                 getter=None, default=_default_marker):
++    def __init__(self, type=None, required=True, volatile=False,
++                 getter=None, default=_default_marker, **kw):
+         """A declarator for a schema attribute.
+-        
++
+         :param type: The required type, or None for no requirement.
+ 
+         :param required: True if the attribute is required on the object.
+@@ -35,9 +35,9 @@
+                        passed the arguments (name, schema, object,
+                        state).  If the getter cannot resolve the name,
+                        it should raise a LookupError.
+-                       
++
+         :param default: A default value for this attribute if it is
+-        not present.
++                        not present.
+         """
+         self.name = None # set by the schema construction
+         self.type = type
+@@ -45,6 +45,8 @@
+         self.volatile = volatile
+         self.default = default
+         self.getter = getter
++        for key, value in kw.items():
++            setattr(self, key, value)
+ 
+     def __call__(self, schema, obj, state):
+         value = None
+multi-schema
 # Placed by Bitbucket