Commits

spirit committed 02b6a7a

Minor

  • Participants
  • Parent commits 1006f40

Comments (0)

Files changed (2)

guess_language/__init__.py

 MAX_LENGTH = 4096
 MIN_LENGTH = 20
 MAX_GRAMS = 300
-WORD_RE = re.compile(r"(?:[^\W0-9_]|['’])+", re.U)
+WORD_RE = re.compile(r"(?:[^\W\d_]|['’])+", re.U)
 MODEL_ROOT = __package__ + ".data.models."
 
 BASIC_LATIN = [
 def guess_language(text: str):
     """Return the language code, i.e. 'en'.
     """
-    words = WORD_RE.findall(text, 0, min(len(text), MAX_LENGTH))
+    words = WORD_RE.findall(text[:MAX_LENGTH])
     return identify(words, find_runs(words))
 
 

guess_language/__main__.py

 """
 
 import argparse
+import os
 import sys
 
 import guess_language
 def parse_args():
     parser = argparse.ArgumentParser(
         description=__doc__.strip(),
-        prog="{} -m {}".format(sys.executable, "guess_language")
+        prog="{} -m {}".format(os.path.basename(sys.executable),
+                               "guess_language")
     )
     parser.add_argument("--encoding", dest="encoding", help="input encoding")
     parser.add_argument("--no-enchant", dest="use_enchant",