Amaury Forgeot d'Arc avatar Amaury Forgeot d'Arc committed 7c3c751

"import" now compiles code from the opened stream,
and does not need to read() the whole source file first.

Comments (0)

Files changed (7)

pypy/interpreter/pycompiler.py

                                  e.wrap_info(space))
         return mod
 
+    def _compile_file_to_ast(self, stream, info):
+        space = self.space
+        try:
+            stream = pyparse.StdStream(space, stream)
+            parse_tree = self.parser.parse_file(stream, info)
+            stream.close()
+            mod = astbuilder.ast_from_node(space, parse_tree, info)
+            f_flags, future_info = future.get_futures(self.future_flags, mod)
+            info.last_future_import = future_info
+            info.flags |= f_flags
+        except parseerror.IndentationError, e:
+            raise OperationError(space.w_IndentationError,
+                                 e.wrap_info(space))
+        except parseerror.SyntaxError, e:
+            raise OperationError(space.w_SyntaxError,
+                                 e.wrap_info(space))
+        return mod
+
     def compile(self, source, filename, mode, flags, hidden_applevel=False):
         info = pyparse.CompileInfo(filename, mode, flags,
                                    hidden_applevel=hidden_applevel)
         mod = self._compile_to_ast(source, info)
         return self._compile_ast(mod, info)
+
+    def compile_file(self, stream, filename, mode, flags, hidden_applevel=False):
+        info = pyparse.CompileInfo(filename, mode, flags,
+                                   hidden_applevel=hidden_applevel)
+        mod = self._compile_file_to_ast(stream, info)
+        return self._compile_ast(mod, info)

pypy/interpreter/pyparser/pyparse.py

             return 'iso-8859-1'
     return encoding
 
-def _check_for_encoding(s1, s2):
-    eol = s1.find('\n')
+def _check_for_encoding(s):
+    eol = s.find('\n')
     if eol < 0:
-        enc = _check_line_for_encoding(s1)
+        enc = _check_line_for_encoding(s)
     else:
-        enc = _check_line_for_encoding(s1[:eol])
+        enc = _check_line_for_encoding(s[:eol])
     if enc:
         return enc
     if eol >= 0:
-        if s2:
-            s = s1 + s2
-        else:
-            s = s1
         eol2 = s.find('\n', eol + 1)
         if eol2 < 0:
             return _check_line_for_encoding(s[eol + 1:])
         return _check_line_for_encoding(s[eol + 1:eol2])
-    elif s2:
-        return _check_line_for_encoding(s2)
 
 
 def _check_line_for_encoding(line):
 
 class Stream(object):
     "Pseudo-file object used by PythonParser.parse_file"
+
     def readline(self):
         raise NotImplementedError
-    def recode_to_utf8(self, text, encoding):
-        raise NotImplementedError
+
+    encoding = None
+    def set_encoding(self, encoding):
+        self.encoding = encoding
+
+    def close(self):
+        pass
+
+
+class StdStream(Stream):
+    def __init__(self, space, stream):
+        self.space = space
+        self.stream = stream
+        self.w_readline = None
+        self.w_file = None
+
+    def readline(self):
+        if not self.w_readline:
+            return self.stream.readline()
+        else:
+            w_line = self.space.call_function(self.w_readline)
+            return self.space.unicode_w(w_line).encode('utf-8')
+
+    def set_encoding(self, encoding):
+        self.encoding = encoding
+        self.w_readline = None
+        if encoding:
+            from pypy.module._codecs.interp_codecs import lookup_codec
+            from pypy.module._file import interp_file
+            space = self.space
+            w_codec_tuple = lookup_codec(space, encoding)
+            self.w_file = interp_file.from_stream(space, self.stream, 'r')
+            w_stream_reader = space.getitem(w_codec_tuple, space.wrap(2))
+            w_reader = space.call_function(w_stream_reader, self.w_file)
+            self.w_readline = space.getattr(w_reader, space.wrap('readline'))
+
+    def close(self):
+        if self.w_file:
+            self.w_file.detach()
 
 class PythonParser(parser.Parser):
 
         parser.Parser.__init__(self, grammar)
         self.space = space
 
-    def _detect_encoding(self, text1, text2, compile_info):
+    def _detect_encoding(self, text, lineno, compile_info):
         "Detect source encoding from the beginning of the file"
-        if text1.startswith("\xEF\xBB\xBF"):
-            text1 = text1[3:]
+        if lineno == 1 and text.startswith("\xEF\xBB\xBF"):
+            text = text[3:]
             compile_info.encoding = 'utf-8'
             # If an encoding is explicitly given check that it is utf-8.
-            decl_enc = _check_for_encoding(text1, text2)
+            decl_enc = _check_for_encoding(text)
             if decl_enc and decl_enc != "utf-8":
                 raise error.SyntaxError("UTF-8 BOM with non-utf8 coding cookie",
                                         filename=compile_info.filename)
         elif compile_info.flags & consts.PyCF_SOURCE_IS_UTF8:
             compile_info.encoding = 'utf-8'
-            if _check_for_encoding(text1, text2) is not None:
+            if _check_for_encoding(text) is not None:
                 raise error.SyntaxError("coding declaration in unicode string",
                                         filename=compile_info.filename)
         else:
             compile_info.encoding = _normalize_encoding(
-                _check_for_encoding(text1, text2))
-        return text1
+                _check_for_encoding(text))
+        return text
 
     def _decode_error(self, e, compile_info):
         space = self.space
         Everything from decoding the source to tokenizing to building the parse
         tree is handled here.
         """
-        textsrc = self._detect_encoding(textsrc, None, compile_info)
+        textsrc = self._detect_encoding(textsrc, 1, compile_info)
 
         enc = compile_info.encoding
         if enc is not None and enc not in ('utf-8', 'iso-8859-1'):
     def parse_file(self, stream, compile_info):
         assert isinstance(stream, Stream)
 
-        firstline = stream.readline()
-        secondline = None
-        if firstline:
-            secondline = stream.readline()
-            if secondline:
-                firstline = self._detect_encoding(
-                    firstline, secondline, compile_info)
-            else:
-                firstline = self._detect_encoding(
-                    firstline, '', compile_info)
+        source_lines = []
+
+        while len(source_lines) < 2:
+            line = stream.readline()
+            if not line:
+                break
+            line = self._detect_encoding(
+                line, 1, compile_info)
+            source_lines.append(line)
+            if compile_info.encoding is not None:
+                break
 
         enc = compile_info.encoding
         if enc in ('utf-8', 'iso-8859-1'):
             enc = None # No need to recode
+        stream.set_encoding(enc)
 
-        source_lines = []
-
-        if enc is None:
-            if firstline:
-                source_lines.append(firstline)
-            if secondline:
-                source_lines.append(secondline)
+        try:
             while True:
                 line = stream.readline()
                 if not line:
                     break
                 source_lines.append(line)
-        else:
-            try:
-                if firstline:
-                    source_lines.append(stream.recode_to_utf8(firstline, enc))
-                if secondline:
-                    source_lines.append(stream.recode_to_utf8(secondline, enc))
-
-                while True:
-                    line = stream.readline()
-                    if not line:
-                        break
-                    source_lines.append(stream.recode_to_utf8(line, enc))
-            except OperationError, e:
-                operror = self._decode_error(e, compile_info)
-                if operror:
-                    raise operror
-                else:
-                    raise
+        except OperationError, e:
+            operror = self._decode_error(e, compile_info)
+            if operror:
+                raise operror
+            else:
+                raise
 
         return self.build_tree(source_lines, compile_info)
 

pypy/interpreter/pyparser/test/test_pyparse.py

             def __init__(self, source):
                 self.stream = StringIO.StringIO(source)
             def readline(self):
-                return self.stream.readline()
-            def recode_to_utf8(self, line, encoding):
+                line = self.stream.readline()
+                if self.encoding is None:
+                    return line
+
                 try:
-                    if encoding is None or encoding in ('utf-8', 'iso-8859-1'):
-                        return line
-                    return line.decode(encoding).encode('utf-8')
+                    return line.decode(self.encoding).encode('utf-8')
                 except LookupError, e:
                     raise OperationError(space.w_LookupError,
                                          space.wrap(e.message))

pypy/module/_file/interp_file.py

         if stream.flushable():
             getopenstreams(self.space)[stream] = None
 
+    def detach(self):
+        stream = self.stream
+        if stream is not None:
+            self.newlines = self.stream.getnewlines()
+            self.stream = None
+            self.fd = -1
+            openstreams = getopenstreams(self.space)
+            try:
+                del openstreams[stream]
+            except KeyError:
+                pass
+        return stream
+
     def check_not_dir(self, fd):
         try:
             st = os.fstat(fd)
         self.fdopenstream(stream, fd, mode)
 
     def direct_close(self):
-        space = self.space
-        stream = self.stream
+        stream = self.detach()
         if stream is not None:
-            self.newlines = self.stream.getnewlines()
-            self.stream = None
-            self.fd = -1
-            openstreams = getopenstreams(self.space)
-            try:
-                del openstreams[stream]
-            except KeyError:
-                pass
             stream.close()
 
     def direct_fileno(self):
     file.file_fdopen(fd, mode, buffering)
     return space.wrap(file)
 
+def from_stream(space, stream, mode):
+    file = W_File(space)
+    fd = stream.try_to_find_file_descriptor()
+    file.fdopenstream(stream, fd, mode)
+    return space.wrap(file)
+
 def descr_file_closed(space, file):
     return space.wrap(file.stream is None)
 
 @unwrap_spec(file=W_File, encoding="str_or_None", errors="str_or_None")
 def set_file_encoding(space, file, encoding=None, errors=None):
     file.encoding = encoding
-    file.errors = errors
+    file.errors = errors

pypy/module/imp/importing.py

         try:
             if find_info.modtype == PY_SOURCE:
                 load_source_module(space, w_modulename, w_mod, find_info.filename,
-                                   find_info.stream.readall())
+                                   find_info.stream)
                 return w_mod
             elif find_info.modtype == PY_COMPILED:
                 magic = _r_long(find_info.stream)
     pycode = ec.compiler.compile(source, pathname, 'exec', 0)
     return pycode
 
+def parse_source_file_module(space, pathname, stream):
+    """ Parse a source file and return the corresponding code object """
+    ec = space.getexecutioncontext()
+    pycode = ec.compiler.compile_file(stream, pathname, 'exec', 0)
+    return pycode
+
 def exec_code_module(space, w_mod, code_w):
     w_dict = space.getattr(w_mod, space.wrap('__dict__'))
     space.call_method(w_dict, 'setdefault',
 
 
 @jit.dont_look_inside
-def load_source_module(space, w_modulename, w_mod, pathname, source,
+def load_source_module(space, w_modulename, w_mod, pathname, source_stream,
                        write_pyc=True):
     """
     Load a source module from a given file and return its module
             stream.close()
         space.setattr(w_mod, w('__file__'), w(cpathname))
     else:
-        code_w = parse_source_module(space, pathname, source)
+        code_w = parse_source_file_module(space, pathname, source_stream)
 
         if space.config.objspace.usepycfiles and write_pyc:
             write_compiled_module(space, code_w, cpathname, mode, mtime)

pypy/module/imp/interp_imp.py

     w_mod = space.wrap(Module(space, w_modulename))
     importing._prepare_module(space, w_mod, filename, None)
 
-    importing.load_source_module(
-        space, w_modulename, w_mod, filename, stream.readall())
+    importing.load_source_module(space, w_modulename, w_mod, filename, stream)
     if space.is_w(w_file, space.w_None):
         stream.close()
     return w_mod

pypy/module/imp/test/test_import.py

                                          w_modname,
                                          w(importing.Module(space, w_modname)),
                                          filename,
-                                         stream.readall())
+                                         stream)
         finally:
             stream.close()
         if space.config.objspace.usepycfiles:
                                                  w_modulename,
                                                  w_mod,
                                                  pathname,
-                                                 stream.readall())
+                                                 stream)
         finally:
             stream.close()
         assert w_mod is w_ret
                                                  w_modulename,
                                                  w_mod,
                                                  pathname,
-                                                 stream.readall(),
+                                                 stream,
                                                  write_pyc=False)
         finally:
             stream.close()
                                                  w_modulename,
                                                  w_mod,
                                                  pathname,
-                                                 stream.readall())
+                                                 stream)
         except OperationError:
             # OperationError("Syntax Error")
             pass
                                                  w_modulename,
                                                  w_mod,
                                                  pathname,
-                                                 stream.readall())
+                                                 stream)
         except OperationError:
             # OperationError("NameError", "global name 'unknown_name' is not defined")
             pass
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.