Commits

spirit committed 7280140

console_mode

  • Participants
  • Parent commits a0f1b8d

Comments (0)

Files changed (2)

File guess_language/__main__.py

 import os
 import sys
 
-import guess_language
+import guess_language.console_mode
 
 
 def parse_args():
                                "guess_language")
     )
     parser.add_argument("file",
-                        help='plain text file or "-" for stdin')
+                        help="plain text file or “-” for stdin")
     parser.add_argument("-c", "--encoding",
                         help="input encoding")
     parser.add_argument("--disable-enchant", dest="use_enchant",

File guess_language/console_mode.py

+"""Write to stdout without causing UnicodeEncodeError
+"""
+
+import sys
+
+
+if (getattr(sys.stdout, "errors", "") == "strict" and
+        not getattr(sys.stdout, "encoding", "").lower().startswith("utf")):
+    try:
+        import translit
+        sys.stdout = translit.StreamFilter(sys.stdout)
+    except ImportError:
+        import codecs
+        import unicodedata
+        import warnings
+
+        TRANSLIT_MAP = {
+            0x2018: "'",
+            0x2019: "'",
+            0x201c: '"',
+            0x201d: '"',
+        }
+
+        def simplify(s):
+            s = s.translate(TRANSLIT_MAP)
+            return "".join([c for c in unicodedata.normalize("NFKD", s)
+                            if not unicodedata.combining(c)])
+
+        def simple_translit_error_handler(error):
+            if not isinstance(error, UnicodeEncodeError):
+                raise error
+            chunk = error.object[error.start:error.end]
+            repl = simplify(chunk)
+            repl = (repl.encode(error.encoding, "backslashreplace")
+                    .decode(error.encoding))
+            return repl, error.end
+
+        class SimpleTranslitStreamFilter:
+            """Filter a stream through simple transliteration.
+            """
+            errors = "simple_translit"
+
+            def __init__(self, target):
+                self.target = target
+
+            def __getattr__(self, name):
+                return getattr(self.target, name)
+
+            def write(self, s):
+                self.target.write(self.downgrade(s))
+
+            def writelines(self, lines):
+                self.target.writelines(
+                    [self.downgrade(line) for line in lines])
+
+            def downgrade(self, s):
+                return (s.encode(self.target.encoding, self.errors)
+                        .decode(self.target.encoding))
+
+        codecs.register_error(SimpleTranslitStreamFilter.errors,
+                              simple_translit_error_handler)
+        sys.stdout = SimpleTranslitStreamFilter(sys.stdout)
+        warnings.warn("translit is unavailable", ImportWarning)