Commits

Anonymous committed 7d00b2c

added :func:`make_line_iter` and removed _ChunkIter from the multipart parser. LimitedStream is now in silent mode by default. Documented readline problems on the LimitedStream.

  • Participants
  • Parent commits f1d3a35

Comments (0)

Files changed (7)

 - added :mod:`werkzeug.contrib.lint`
 - added `passthrough_errors` to `run_simple`.
 - added `secure_filename`
+- added :func:`make_line_iter` 
 
 Version 0.4.1
 -------------
 .. autoclass:: LimitedStream
    :members:
 
+.. autofunction:: make_line_iter
+
 .. autofunction:: get_host
 
 .. autofunction:: get_current_url

tests/test_utils.py

 def test_limited_stream():
     """Test the LimitedStream"""
     io = StringIO('123456')
-    stream = LimitedStream(io, 3)
+    stream = LimitedStream(io, 3, False)
     assert stream.read() == '123'
     assert_raises(BadRequest, stream.read)
 
     io = StringIO('123456')
-    stream = LimitedStream(io, 3)
+    stream = LimitedStream(io, 3, False)
     assert stream.read(1) == '1'
     assert stream.read(1) == '2'
     assert stream.read(1) == '3'
     io = StringIO('123456\nabcdefg')
     stream = LimitedStream(io, 9)
     assert stream.readlines(100) == ['123456\n', 'ab']
+
+    io = StringIO('123456')
+    stream = LimitedStream(io, 3)
+    assert stream.read(1) == '1'
+    assert stream.read(1) == '2'
+    assert stream.read() == '3'
+    assert stream.read() == ''

werkzeug/__init__.py

                              'validate_arguments', 'ArgumentValidationError',
                              'bind_arguments', 'FileWrapper', 'wrap_file',
                              'pop_path_info', 'peek_path_info',
-                             'LimitedStream', 'secure_filename'],
+                             'LimitedStream', 'make_line_iter',
+                             'secure_filename'],
     'werkzeug.datastructures': ['MultiDict', 'CombinedMultiDict', 'Headers',
                              'EnvironHeaders', 'ImmutableList',
                              'ImmutableDict', 'ImmutableMultiDict',

werkzeug/contrib/limiter.py

 
     def __init__(self, environ, limit):
         _SilentLimitedStream.__init__(self, environ, limit)
-        warn(DeprecationWarning('comtrin limited stream is deprecated, use '
+        warn(DeprecationWarning('contrib limited stream is deprecated, use '
                                 'werkzeug.LimitedStream instead.'),
              stacklevel=2)
 
 from tempfile import TemporaryFile
 from urllib2 import parse_http_list as _parse_list_header
 from datetime import datetime
+from itertools import chain, repeat
 try:
     from hashlib import md5
 except ImportError:
     in_memory = 0
 
     # convert the file into a limited stream with iteration capabilities
-    iterator = _ChunkIter(file, content_length, buffer_size)
+    file = LimitedStream(file, content_length)
+    iterator = chain(make_line_iter(file, buffer_size=buffer_size),
+                     repeat(''))
 
     try:
         terminator = iterator.next().strip()
                                                    charset, errors)))
     finally:
         # make sure the whole input stream is read
-        iterator.exhaust()
+        file.exhaust()
 
     return form, files
 
 
 
 # circular dependency fun
-from werkzeug.utils import LimitedStream, FileStorage
+from werkzeug.utils import make_line_iter, FileStorage, LimitedStream
 from werkzeug.datastructures import Headers, Accept, RequestCacheControl, \
      ResponseCacheControl, HeaderSet, ETags, Authorization, \
      WWWAuthenticate
 
 
-class _ChunkIter(LimitedStream):
-    """An iterator that yields chunks from the file.  This iterator
-    does not end!  It will happily continue yielding empty strings
-    if the limit is reached.  This is intentional.
-    """
-
-    def __init__(self, stream, limit, buffer_size):
-        LimitedStream.__init__(self, stream, limit, True)
-        self._buffer = []
-        self._buffer_size = buffer_size
-
-    def next(self):
-        if len(self._buffer) > 1:
-            return self._buffer.pop(0)
-        chunks = self.read(self._buffer_size).splitlines(True)
-        first_chunk = self._buffer and self._buffer[0] or ''
-        if chunks:
-            first_chunk += chunks.pop(0)
-        self._buffer = chunks
-        return first_chunk
-
-
 # backwards compatibible imports
 from werkzeug.datastructures import MIMEAccept, CharsetAccept, LanguageAccept

werkzeug/utils.py

         raise StopIteration()
 
 
+
+def make_line_iter(stream, limit=None, buffer_size=10 * 1024):
+    """Savely iterates line-based over an input stream.  If the input stream
+    is not a :class:`LimitedStream` the `limit` parameter is mandatory.
+
+    This uses the stream's :meth:`~file.read` method internally as opposite
+    to the :meth:`~file.readline` method that is unsafe and can only be used
+    in violation of the WSGI specification.  The same problem applies to the
+    `__iter__` function of the input stream which calls :meth:`~file.readline`
+    without arguments.
+
+    If you need line-by-line processing it's strongly recommended to iterate
+    over the input stream using this helper function.
+
+    :param stream: the stream to iterate over.
+    :param limit: the limit in bytes for the stream.  (Usually
+                  content length.  Not necessary if the `stream`
+                  is a :class:`LimitedStream`.
+    :param buffer_size: The optional buffer size.
+    """
+    if not isinstance(stream, LimitedStream):
+        if limit is None:
+            raise TypeError('stream not limited and no limit provided.')
+        stream = LimitedStream(stream, limit)
+    buffer = []
+    while 1:
+        if len(buffer) > 1:
+            yield buffer.pop(0)
+            continue
+        chunks = stream.read(buffer_size).splitlines(True)
+        first_chunk = buffer and buffer[0] or ''
+        if chunks:
+            first_chunk += chunks.pop(0)
+        buffer = chunks
+        if not first_chunk:
+            return
+        yield first_chunk
+
+
 class LimitedStream(object):
     """Wraps a stream so that it doesn't read more than n bytes.  If the
     stream is exhausted and the caller tries to get more bytes from it
-    :func:`on_exhausted` is called which by default raises a
-    :exc:`~werkzeug.exceptions.BadRequest`.  The return value of that
-    function is forwarded to the reader function.  So if it returns an
-    empty string :meth:`read` will return an empty string as well.
+    :func:`on_exhausted` is called which by default returns an empty
+    string or raises :exc:`~werkzeug.exceptions.BadRequest` if silent
+    is set to `False`.  The return value of that function is forwarded
+    to the reader function.  So if it returns an empty string
+    :meth:`read` will return an empty string as well.
 
     The limit however must never be higher than what the stream can
     output.  Otherwise :meth:`readlines` will try to read past the
     The `silent` parameter has no effect if :meth:`is_exhausted` is
     overriden by a subclass.
 
+    .. admonition:: Note on WSGI compliance
+
+       calls to :meth:`readline` and :meth:`readlines` are not
+       WSGI compliant because it passes a size argument to the
+       readline methods.  Unfortunately the WSGI PEP is not safely
+       implementable without a size argument to :meth:`readline`
+       because there is no EOF marker in the stream.  As a result
+       of that the use of :meth:`readline` is discouraged.
+
+       For the same reason iterating over the :class:`LimitedStream`
+       is not portable.  It internally calls :meth:`readline`.
+
+       We strongly suggest using :meth:`read` only or using the
+       :func:`make_line_iter` which savely iterates line-based
+       over a WSGI input stream.
+
     :param stream: the stream to wrap.
     :param limit: the limit for the stream, must not be longer than
                   what the string can provide if the stream does not
                    past the limit and will return an empty string.
     """
 
-    def __init__(self, stream, limit, silent=False):
+    def __init__(self, stream, limit, silent=True):
         self._stream = stream
         self._pos = 0
         self.limit = limit
         return read
 
     def readline(self, size=None):
-        """Read a line from the stream.  Arguments are forwarded to the
-        `readline` function of the underlaying stream if it supports
-        them.
-        """
+        """Reads one line from the stream."""
         if self._pos >= self.limit:
             return self.on_exhausted()
         if size is None:
                           charset, errors=errors, dict_class=dict_class)
     else:
         form = dict_class()
-        stream = LimitedStream(environ['wsgi.input'], content_length,
-                               silent=True)
+        stream = LimitedStream(environ['wsgi.input'], content_length)
 
     return stream, form, dict_class(files)