Matt Chaput avatar Matt Chaput committed 321a002

Check for ValueError in queries that call to_bytes(). Fixes issue #355

Comments (0)

Files changed (7)

src/whoosh/fields.py

         dc = self.decimal_places
         if dc and isinstance(x, (string_type, Decimal)):
             x = Decimal(x) * (10 ** dc)
-        x = self.numtype(x)
+
+        try:
+            x = self.numtype(x)
+        except OverflowError:
+            raise ValueError("Value %r overflowed number type %r"
+                             % (x, self.numtype))
 
         if x < self.min_value or x > self.max_value:
             raise ValueError("Numeric field value %s out of range [%s, %s]"

src/whoosh/query/positional.py

         # Build a list of Term queries from the words in the phrase
         reader = searcher.reader()
         for word in self.words:
-            word = field.to_bytes(word)
+            try:
+                word = field.to_bytes(word)
+            except ValueError:
+                return matching.NullMatcher()
+
             if (fieldname, word) not in reader:
                 # Shortcut the query if one of the words doesn't exist.
                 return matching.NullMatcher()

src/whoosh/query/qcore.py

             for fieldname, text in terms:
                 if (fieldname, text) in termset:
                     continue
+
                 if fieldname in schema:
                     field = schema[fieldname]
-                    btext = field.to_bytes(text)
+
+                    try:
+                        btext = field.to_bytes(text)
+                    except ValueError:
+                        continue
+
                     if (fieldname, btext) in ixreader:
                         termset.add((fieldname, btext))
         return termset

src/whoosh/query/ranges.py

         if self.start is None:
             start = b("")
         else:
-            start = field.to_bytes(self.start)
+            try:
+                start = field.to_bytes(self.start)
+            except ValueError:
+                return
+
         if self.end is None:
             end = b("\xFF\xFF\xFF\xFF")
         else:
-            end = field.to_bytes(self.end)
+            try:
+                end = field.to_bytes(self.end)
+            except ValueError:
+                return
 
         for fname, t in ixreader.terms_from(fieldname, start):
             if fname != fieldname:

src/whoosh/query/terms.py

         fieldname = self.fieldname
         if fieldname not in ixreader.schema:
             return 0
+
         field = ixreader.schema[fieldname]
-        text = field.to_bytes(self.text)
+        try:
+            text = field.to_bytes(self.text)
+        except ValueError:
+            return 0
+
         return ixreader.doc_frequency(fieldname, text)
 
     def matcher(self, searcher, context=None):
             return matching.NullMatcher()
 
         field = searcher.schema[fieldname]
-        text = field.to_bytes(text)
+        try:
+            text = field.to_bytes(text)
+        except ValueError:
+            return matching.NullMatcher()
 
         if (self.fieldname, text) in searcher.reader():
             if context is None:
         fieldname = self.fieldname
         to_bytes = ixreader.schema[fieldname].to_bytes
         for word in variations(self.text):
-            btext = to_bytes(word)
+            try:
+                btext = to_bytes(word)
+            except ValueError:
+                continue
+
             if (fieldname, btext) in ixreader:
                 yield btext
 

tests/test_collector.py

     with ix.searcher() as s:
         q = SlowQuery()
 
+        t = time.time()
         c = s.collector()
         c = collectors.TimeLimitCollector(c, 0.2)
         with pytest.raises(searching.TimeLimit):
             _ = s.search_with_collector(q, c)
+        assert time.time() - t < 0.5
 
 
 

tests/test_queries.py

     with pytest.raises(query.QueryError):
         _ = query.And([query.Term("a", "b"), None, query.Term("c", "d")])
 
+
+def test_issue_355():
+    schema = fields.Schema(seats=fields.NUMERIC(bits=8, stored=True))
+    ix = RamStorage().create_index(schema)
+    with ix.writer() as w:
+        w.add_document(seats=0)
+        w.add_document(seats=10)
+        w.add_document(seats=20)
+
+    with ix.searcher() as s:
+        # Passing a bytestring for a numeric field
+        q = Term("seats", b("maker"))
+        r1 = [hit["seats"] for hit in s.search(q, limit=5)]
+
+        # Passing a unicode string for a numeric field
+        q = Term("seats", u("maker"))
+        r2 = [hit["seats"] for hit in s.search(q, limit=5)]
+
+        # Passing a value too large for the numeric field
+        q = Term("seats", 260)
+        r3 = [hit["seats"] for hit in s.search(q, limit=5)]
+
+        assert r1 == r2 == r3 == []
+
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.