Commits

Anonymous committed 27ab295

use utf-8 as encoder/decoder name consistently (not utf8)

  • Participants
  • Parent commits b17ebe6

Comments (0)

Files changed (5)

File benchmark/marc21.py

 
 
 def uni(v):
-    return u"" if v is None else v.decode("utf8", "replace")
+    return u"" if v is None else v.decode("utf-8", "replace")
 
 
 # Indexing and searching
                    glob=options.glob)
 
     if args:
-        qstring = " ".join(args).decode("utf8")
+        qstring = " ".join(args).decode("utf-8")
         limit = int(options.limit)
         if limit < 1:
             limit = None

File src/whoosh/codec/whoosh2.py

         self.text = text
         self.terminfo = FileTermInfo()
         if self.spelling:
-            self.dawg.insert(text.decode("utf8"))  # TODO: how to decode bytes?
+            self.dawg.insert(text.decode("utf-8"))  # TODO: how to decode bytes?
         self._start_blocklist()
 
     def add(self, docnum, weight, valuestring, length):

File src/whoosh/support/bench.py

                                           schema=ix.schema)
 
     def query(self):
-        qstring = " ".join(self.args).decode("utf8")
+        qstring = " ".join(self.args).decode("utf-8")
         return self.parser.parse(qstring)
 
     def find(self, q):

File src/whoosh/util/text.py

     the prefix it shares with a, followed by the suffix encoded as UTF-8.
     """
     i = first_diff(a, b)
-    return chr(i) + b[i:].encode("utf8")
+    return chr(i) + b[i:].encode("utf-8")
 
 
 def prefix_encode_all(ls):
     last = u('')
     for w in ls:
         i = first_diff(last, w)
-        yield chr(i) + w[i:].encode("utf8")
+        yield chr(i) + w[i:].encode("utf-8")
         last = w
 
 
     last = u('')
     for w in ls:
         i = ord(w[0])
-        decoded = last[:i] + w[1:].decode("utf8")
+        decoded = last[:i] + w[1:].decode("utf-8")
         yield decoded
         last = decoded
 

File tests/test_codecs.py

         a = array("H", (random.randint(0, 0xd7ff) for _ in xrange(1, 20)))
         return array_tobytes(a).decode("utf-16")
 
-    domain = sorted(set([(random_fieldname(), random_btext().encode("utf8"))
+    domain = sorted(set([(random_fieldname(), random_btext().encode("utf-8"))
                          for _ in xrange(1000)]))
 
     st, codec, seg = _make_codec()