Commits

Georg Brandl committed f16b029

Fix #705: read module source in ModuleAnalyzer in binary mode, decode afterwards.

Comments (0)

Files changed (2)

sphinx/pycode/__init__.py

 """
 
 from os import path
-from cStringIO import StringIO
 
 from sphinx.errors import PycodeError
 from sphinx.pycode import nodes
 from sphinx.pycode.pgen2 import driver, token, tokenize, parse, literals
 from sphinx.util import get_module_source, detect_encoding
-from sphinx.util.pycompat import next
+from sphinx.util.pycompat import next, StringIO, BytesIO, TextIOWrapper
 from sphinx.util.docstrings import prepare_docstring, prepare_commentdoc
 
 
 
     @classmethod
     def for_string(cls, string, modname, srcname='<string>'):
-        return cls(StringIO(string), modname, srcname)
+        if isinstance(string, bytes):
+            return cls(BytesIO(string), modname, srcname)
+        return cls(StringIO(string), modname, srcname, decoded=True)
 
     @classmethod
     def for_file(cls, filename, modname):
         if ('file', filename) in cls.cache:
             return cls.cache['file', filename]
         try:
-            fileobj = open(filename, 'r')
+            fileobj = open(filename, 'rb')
         except Exception, err:
             raise PycodeError('error opening %r' % filename, err)
         obj = cls(fileobj, modname, filename)
         cls.cache['module', modname] = obj
         return obj
 
-    def __init__(self, source, modname, srcname):
+    def __init__(self, source, modname, srcname, decoded=False):
         # name of the module
         self.modname = modname
         # name of the source file
 
         # cache the source code as well
         pos = self.source.tell()
-        self.encoding = detect_encoding(self.source.readline)
-        self.code = self.source.read()
-        self.source.seek(pos)
+        if not decoded:
+            self.encoding = detect_encoding(self.source.readline)
+            self.code = self.source.read().decode(self.encoding)
+            self.source.seek(pos)
+            self.source = TextIOWrapper(self.source, self.encoding)
+        else:
+            self.encoding = None
+            self.code = self.source.read()
+            self.source.seek(pos)
 
         # will be filled by tokenize()
         self.tokens = None

sphinx/util/pycompat.py

     bytes = bytes
     # prefix for Unicode strings
     u = ''
+    # StringIO/BytesIO classes
+    from io import StringIO, BytesIO, TextIOWrapper
     # support for running 2to3 over config files
     def convert_with_2to3(filepath):
         from lib2to3.refactor import RefactoringTool, get_fixers_from_package
     b = str
     bytes = str
     u = 'u'
+    from StringIO import StringIO
+    BytesIO = StringIO
     # no need to refactor on 2.x versions
     convert_with_2to3 = None
+    def TextIOWrapper(stream, encoding):
+        return codecs.lookup(encoding or 'ascii')[2](stream)
 
 
 # ------------------------------------------------------------------------------