Commits

Amaury Forgeot d'Arc  committed 7c3c751

"import" now compiles code from the opened stream,
and does not need to read() the whole source file first.

  • Participants
  • Parent commits ff651a2
  • Branches compile-from-stream

Comments (0)

Files changed (7)

File pypy/interpreter/pycompiler.py

                                  e.wrap_info(space))
         return mod
 
+    def _compile_file_to_ast(self, stream, info):
+        space = self.space
+        try:
+            stream = pyparse.StdStream(space, stream)
+            parse_tree = self.parser.parse_file(stream, info)
+            stream.close()
+            mod = astbuilder.ast_from_node(space, parse_tree, info)
+            f_flags, future_info = future.get_futures(self.future_flags, mod)
+            info.last_future_import = future_info
+            info.flags |= f_flags
+        except parseerror.IndentationError, e:
+            raise OperationError(space.w_IndentationError,
+                                 e.wrap_info(space))
+        except parseerror.SyntaxError, e:
+            raise OperationError(space.w_SyntaxError,
+                                 e.wrap_info(space))
+        return mod
+
     def compile(self, source, filename, mode, flags, hidden_applevel=False):
         info = pyparse.CompileInfo(filename, mode, flags,
                                    hidden_applevel=hidden_applevel)
         mod = self._compile_to_ast(source, info)
         return self._compile_ast(mod, info)
+
+    def compile_file(self, stream, filename, mode, flags, hidden_applevel=False):
+        info = pyparse.CompileInfo(filename, mode, flags,
+                                   hidden_applevel=hidden_applevel)
+        mod = self._compile_file_to_ast(stream, info)
+        return self._compile_ast(mod, info)

File pypy/interpreter/pyparser/pyparse.py

             return 'iso-8859-1'
     return encoding
 
-def _check_for_encoding(s1, s2):
-    eol = s1.find('\n')
+def _check_for_encoding(s):
+    eol = s.find('\n')
     if eol < 0:
-        enc = _check_line_for_encoding(s1)
+        enc = _check_line_for_encoding(s)
     else:
-        enc = _check_line_for_encoding(s1[:eol])
+        enc = _check_line_for_encoding(s[:eol])
     if enc:
         return enc
     if eol >= 0:
-        if s2:
-            s = s1 + s2
-        else:
-            s = s1
         eol2 = s.find('\n', eol + 1)
         if eol2 < 0:
             return _check_line_for_encoding(s[eol + 1:])
         return _check_line_for_encoding(s[eol + 1:eol2])
-    elif s2:
-        return _check_line_for_encoding(s2)
 
 
 def _check_line_for_encoding(line):
 
 class Stream(object):
     "Pseudo-file object used by PythonParser.parse_file"
+
     def readline(self):
         raise NotImplementedError
-    def recode_to_utf8(self, text, encoding):
-        raise NotImplementedError
+
+    encoding = None
+    def set_encoding(self, encoding):
+        self.encoding = encoding
+
+    def close(self):
+        pass
+
+
+class StdStream(Stream):
+    def __init__(self, space, stream):
+        self.space = space
+        self.stream = stream
+        self.w_readline = None
+        self.w_file = None
+
+    def readline(self):
+        if not self.w_readline:
+            return self.stream.readline()
+        else:
+            w_line = self.space.call_function(self.w_readline)
+            return self.space.unicode_w(w_line).encode('utf-8')
+
+    def set_encoding(self, encoding):
+        self.encoding = encoding
+        self.w_readline = None
+        if encoding:
+            from pypy.module._codecs.interp_codecs import lookup_codec
+            from pypy.module._file import interp_file
+            space = self.space
+            w_codec_tuple = lookup_codec(space, encoding)
+            self.w_file = interp_file.from_stream(space, self.stream, 'r')
+            w_stream_reader = space.getitem(w_codec_tuple, space.wrap(2))
+            w_reader = space.call_function(w_stream_reader, self.w_file)
+            self.w_readline = space.getattr(w_reader, space.wrap('readline'))
+
+    def close(self):
+        if self.w_file:
+            self.w_file.detach()
 
 class PythonParser(parser.Parser):
 
         parser.Parser.__init__(self, grammar)
         self.space = space
 
-    def _detect_encoding(self, text1, text2, compile_info):
+    def _detect_encoding(self, text, lineno, compile_info):
         "Detect source encoding from the beginning of the file"
-        if text1.startswith("\xEF\xBB\xBF"):
-            text1 = text1[3:]
+        if lineno == 1 and text.startswith("\xEF\xBB\xBF"):
+            text = text[3:]
             compile_info.encoding = 'utf-8'
             # If an encoding is explicitly given check that it is utf-8.
-            decl_enc = _check_for_encoding(text1, text2)
+            decl_enc = _check_for_encoding(text)
             if decl_enc and decl_enc != "utf-8":
                 raise error.SyntaxError("UTF-8 BOM with non-utf8 coding cookie",
                                         filename=compile_info.filename)
         elif compile_info.flags & consts.PyCF_SOURCE_IS_UTF8:
             compile_info.encoding = 'utf-8'
-            if _check_for_encoding(text1, text2) is not None:
+            if _check_for_encoding(text) is not None:
                 raise error.SyntaxError("coding declaration in unicode string",
                                         filename=compile_info.filename)
         else:
             compile_info.encoding = _normalize_encoding(
-                _check_for_encoding(text1, text2))
-        return text1
+                _check_for_encoding(text))
+        return text
 
     def _decode_error(self, e, compile_info):
         space = self.space
         Everything from decoding the source to tokenizing to building the parse
         tree is handled here.
         """
-        textsrc = self._detect_encoding(textsrc, None, compile_info)
+        textsrc = self._detect_encoding(textsrc, 1, compile_info)
 
         enc = compile_info.encoding
         if enc is not None and enc not in ('utf-8', 'iso-8859-1'):
     def parse_file(self, stream, compile_info):
         assert isinstance(stream, Stream)
 
-        firstline = stream.readline()
-        secondline = None
-        if firstline:
-            secondline = stream.readline()
-            if secondline:
-                firstline = self._detect_encoding(
-                    firstline, secondline, compile_info)
-            else:
-                firstline = self._detect_encoding(
-                    firstline, '', compile_info)
+        source_lines = []
+
+        while len(source_lines) < 2:
+            line = stream.readline()
+            if not line:
+                break
+            line = self._detect_encoding(
+                line, 1, compile_info)
+            source_lines.append(line)
+            if compile_info.encoding is not None:
+                break
 
         enc = compile_info.encoding
         if enc in ('utf-8', 'iso-8859-1'):
             enc = None # No need to recode
+        stream.set_encoding(enc)
 
-        source_lines = []
-
-        if enc is None:
-            if firstline:
-                source_lines.append(firstline)
-            if secondline:
-                source_lines.append(secondline)
+        try:
             while True:
                 line = stream.readline()
                 if not line:
                     break
                 source_lines.append(line)
-        else:
-            try:
-                if firstline:
-                    source_lines.append(stream.recode_to_utf8(firstline, enc))
-                if secondline:
-                    source_lines.append(stream.recode_to_utf8(secondline, enc))
-
-                while True:
-                    line = stream.readline()
-                    if not line:
-                        break
-                    source_lines.append(stream.recode_to_utf8(line, enc))
-            except OperationError, e:
-                operror = self._decode_error(e, compile_info)
-                if operror:
-                    raise operror
-                else:
-                    raise
+        except OperationError, e:
+            operror = self._decode_error(e, compile_info)
+            if operror:
+                raise operror
+            else:
+                raise
 
         return self.build_tree(source_lines, compile_info)
 

File pypy/interpreter/pyparser/test/test_pyparse.py

             def __init__(self, source):
                 self.stream = StringIO.StringIO(source)
             def readline(self):
-                return self.stream.readline()
-            def recode_to_utf8(self, line, encoding):
+                line = self.stream.readline()
+                if self.encoding is None:
+                    return line
+
                 try:
-                    if encoding is None or encoding in ('utf-8', 'iso-8859-1'):
-                        return line
-                    return line.decode(encoding).encode('utf-8')
+                    return line.decode(self.encoding).encode('utf-8')
                 except LookupError, e:
                     raise OperationError(space.w_LookupError,
                                          space.wrap(e.message))

File pypy/module/_file/interp_file.py

         if stream.flushable():
             getopenstreams(self.space)[stream] = None
 
+    def detach(self):
+        stream = self.stream
+        if stream is not None:
+            self.newlines = self.stream.getnewlines()
+            self.stream = None
+            self.fd = -1
+            openstreams = getopenstreams(self.space)
+            try:
+                del openstreams[stream]
+            except KeyError:
+                pass
+        return stream
+
     def check_not_dir(self, fd):
         try:
             st = os.fstat(fd)
         self.fdopenstream(stream, fd, mode)
 
     def direct_close(self):
-        space = self.space
-        stream = self.stream
+        stream = self.detach()
         if stream is not None:
-            self.newlines = self.stream.getnewlines()
-            self.stream = None
-            self.fd = -1
-            openstreams = getopenstreams(self.space)
-            try:
-                del openstreams[stream]
-            except KeyError:
-                pass
             stream.close()
 
     def direct_fileno(self):
     file.file_fdopen(fd, mode, buffering)
     return space.wrap(file)
 
+def from_stream(space, stream, mode):
+    file = W_File(space)
+    fd = stream.try_to_find_file_descriptor()
+    file.fdopenstream(stream, fd, mode)
+    return space.wrap(file)
+
 def descr_file_closed(space, file):
     return space.wrap(file.stream is None)
 
 @unwrap_spec(file=W_File, encoding="str_or_None", errors="str_or_None")
 def set_file_encoding(space, file, encoding=None, errors=None):
     file.encoding = encoding
-    file.errors = errors
+    file.errors = errors

File pypy/module/imp/importing.py

         try:
             if find_info.modtype == PY_SOURCE:
                 load_source_module(space, w_modulename, w_mod, find_info.filename,
-                                   find_info.stream.readall())
+                                   find_info.stream)
                 return w_mod
             elif find_info.modtype == PY_COMPILED:
                 magic = _r_long(find_info.stream)
     pycode = ec.compiler.compile(source, pathname, 'exec', 0)
     return pycode
 
+def parse_source_file_module(space, pathname, stream):
+    """ Parse a source file and return the corresponding code object """
+    ec = space.getexecutioncontext()
+    pycode = ec.compiler.compile_file(stream, pathname, 'exec', 0)
+    return pycode
+
 def exec_code_module(space, w_mod, code_w):
     w_dict = space.getattr(w_mod, space.wrap('__dict__'))
     space.call_method(w_dict, 'setdefault',
 
 
 @jit.dont_look_inside
-def load_source_module(space, w_modulename, w_mod, pathname, source,
+def load_source_module(space, w_modulename, w_mod, pathname, source_stream,
                        write_pyc=True):
     """
     Load a source module from a given file and return its module
             stream.close()
         space.setattr(w_mod, w('__file__'), w(cpathname))
     else:
-        code_w = parse_source_module(space, pathname, source)
+        code_w = parse_source_file_module(space, pathname, source_stream)
 
         if space.config.objspace.usepycfiles and write_pyc:
             write_compiled_module(space, code_w, cpathname, mode, mtime)

File pypy/module/imp/interp_imp.py

     w_mod = space.wrap(Module(space, w_modulename))
     importing._prepare_module(space, w_mod, filename, None)
 
-    importing.load_source_module(
-        space, w_modulename, w_mod, filename, stream.readall())
+    importing.load_source_module(space, w_modulename, w_mod, filename, stream)
     if space.is_w(w_file, space.w_None):
         stream.close()
     return w_mod

File pypy/module/imp/test/test_import.py

                                          w_modname,
                                          w(importing.Module(space, w_modname)),
                                          filename,
-                                         stream.readall())
+                                         stream)
         finally:
             stream.close()
         if space.config.objspace.usepycfiles:
                                                  w_modulename,
                                                  w_mod,
                                                  pathname,
-                                                 stream.readall())
+                                                 stream)
         finally:
             stream.close()
         assert w_mod is w_ret
                                                  w_modulename,
                                                  w_mod,
                                                  pathname,
-                                                 stream.readall(),
+                                                 stream,
                                                  write_pyc=False)
         finally:
             stream.close()
                                                  w_modulename,
                                                  w_mod,
                                                  pathname,
-                                                 stream.readall())
+                                                 stream)
         except OperationError:
             # OperationError("Syntax Error")
             pass
                                                  w_modulename,
                                                  w_mod,
                                                  pathname,
-                                                 stream.readall())
+                                                 stream)
         except OperationError:
             # OperationError("NameError", "global name 'unknown_name' is not defined")
             pass