Philip Jenvey avatar Philip Jenvey committed 117775c

hook length_hinting into map/filter/zip

Comments (0)

Files changed (5)

pypy/module/__builtin__/app_functional.py

 Plain Python definition of the builtin functions oriented towards
 functional programming.
 """
+from __future__ import with_statement
+import operator
+from __pypy__ import resizelist_hint
 
 # ____________________________________________________________
 
     if num_collections == 1:
         if none_func:
             return list(collections[0])
-        else:
-            # Special case for the really common case of a single collection,
-            # this can be eliminated if we could unroll that loop that creates
-            # `args` based on whether or not len(collections) was constant
-            result = []
-            for item in collections[0]:
+        # Special case for the really common case of a single collection,
+        # this can be eliminated if we could unroll that loop that creates
+        # `args` based on whether or not len(collections) was constant
+        seq = collections[0]
+        with _managed_newlist_hint(operator._length_hint(seq, 0)) as result:
+            for item in seq:
                 result.append(func(item))
             return result
+
+    # Gather the iterators (pair of (iter, has_finished)) and guess the
+    # result length (the max of the input lengths)
+    iterators = []
+    max_hint = 0
+    for seq in collections:
+        iterators.append((iter(seq), False))
+        max_hint = max(max_hint, operator._length_hint(seq, 0))
+
+    with _managed_newlist_hint(max_hint) as result:
+        while True:
+            cont = False
+            args = []
+            for idx, (iterator, has_finished) in enumerate(iterators):
+                val = None
+                if not has_finished:
+                    try:
+                        val = next(iterator)
+                    except StopIteration:
+                        iterators[idx] = (None, True)
+                    else:
+                        cont = True
+                args.append(val)
+            args = tuple(args)
+            if cont:
+                if none_func:
+                    result.append(args)
+                else:
+                    result.append(func(*args))
+            else:
+                return result
+
+def _newlist_hint(length_hint):
+    """Return an empty list with an underlying capacity of length_hint"""
     result = []
-    # Pair of (iterator, has_finished)
-    iterators = [(iter(seq), False) for seq in collections]
-    while True:
-        cont = False
-        args = []
-        for idx, (iterator, has_finished) in enumerate(iterators):
-            val = None
-            if not has_finished:
-                try:
-                    val = next(iterator)
-                except StopIteration:
-                    iterators[idx] = (None, True)
-                else:
-                    cont = True
-            args.append(val)
-        args = tuple(args)
-        if cont:
-            if none_func:
-                result.append(args)
-            else:
-                result.append(func(*args))
-        else:
-            return result
+    resizelist_hint(result, length_hint)
+    return result
+
+def _managed_newlist_hint(length_hint):
+    """Context manager returning a _newlist_hint upon entry.
+
+    Upon exit the list's underlying capacity will be cut back to match
+    its length if necessary (incase the initial length_hint was too
+    large).
+    """
+    # hack: can't import contextlib.contextmanager at the global level
+    from contextlib import contextmanager
+    @contextmanager
+    def _do_managed_newlist_hint(length_hint):
+        result = _newlist_hint(length_hint)
+        yield result
+        extended = len(result)
+        if extended < length_hint:
+            resizelist_hint(result, extended)
+    return _do_managed_newlist_hint(length_hint)
 
 sentinel = object()
 
         return _filter_string(func, seq, unicode)
     elif isinstance(seq, tuple):
         return _filter_tuple(func, seq)
-    result = []
-    for item in seq:
-        if func(item):
-            result.append(item)
+    with _managed_newlist_hint(operator._length_hint(seq, 0)) as result:
+        for item in seq:
+            if func(item):
+                result.append(item)
     return result
 
 def _filter_string(func, string, str_type):
     if func is bool and type(string) is str_type:
         return string
-    result = []
-    for i in range(len(string)):
+    length = len(string)
+    result = _newlist_hint(length)
+    for i in range(length):
         # You must call __getitem__ on the strings, simply iterating doesn't
         # work :/
         item = string[i]
     return str_type().join(result)
 
 def _filter_tuple(func, seq):
-    result = []
-    for i in range(len(seq)):
+    length = len(seq)
+    result = _newlist_hint(length)
+    for i in range(length):
         # Again, must call __getitem__, at least there are tests.
         item = seq[i]
         if func(item):
 in length to the length of the shortest argument sequence."""
     if not sequences:
         return []
-    result = []
-    iterators = [iter(seq) for seq in sequences]
-    while True:
-        try:
-            items = [next(it) for it in iterators]
-        except StopIteration:
-            return result
-        result.append(tuple(items))
+
+    # Gather the iterators and guess the result length (the min of the
+    # input lengths)
+    iterators = []
+    min_hint = -1
+    for seq in sequences:
+        iterators.append(iter(seq))
+        hint = operator._length_hint(seq, min_hint)
+        if min_hint == -1 or hint < min_hint:
+            min_hint = hint
+
+    with _managed_newlist_hint(min_hint) as result:
+        while True:
+            try:
+                items = [next(it) for it in iterators]
+            except StopIteration:
+                return result
+            result.append(tuple(items))

pypy/module/__pypy__/__init__.py

         'do_what_I_mean'            : 'interp_magic.do_what_I_mean',
         'list_strategy'             : 'interp_magic.list_strategy',
         'validate_fd'               : 'interp_magic.validate_fd',
+        'resizelist_hint'           : 'interp_magic.resizelist_hint',
         'newdict'                   : 'interp_dict.newdict',
         'dictstrategy'              : 'interp_dict.dictstrategy',
     }

pypy/module/__pypy__/interp_magic.py

 from pypy.interpreter.baseobjspace import ObjSpace, W_Root
 from pypy.interpreter.error import OperationError, wrap_oserror
 from pypy.interpreter.gateway import unwrap_spec
-from pypy.rlib.objectmodel import we_are_translated
+from pypy.rlib.objectmodel import resizelist_hint, we_are_translated
+from pypy.objspace.std.listobject import W_ListObject
 from pypy.objspace.std.typeobject import MethodCache
 from pypy.objspace.std.mapdict import IndexCache
 from pypy.rlib import rposix
     return space.wrap(42)
 
 def list_strategy(space, w_list):
-    from pypy.objspace.std.listobject import W_ListObject
     if isinstance(w_list, W_ListObject):
         return space.wrap(w_list.strategy._applevel_repr)
     else:
         space.wrap('cp%d' % rwin32.GetConsoleCP()),
         space.wrap('cp%d' % rwin32.GetConsoleOutputCP()),
         ])
+
+@unwrap_spec(sizehint=int)
+def resizelist_hint(space, w_iterable, sizehint):
+    if not isinstance(w_iterable, W_ListObject):
+        raise OperationError(space.w_TypeError,
+                             space.wrap("arg 1 must be a 'list'"))
+    w_iterable._resize_hint(sizehint)

pypy/module/operator/__init__.py

                     'sub', 'truediv', 'truth', 'xor',
                     'iadd', 'iand', 'iconcat', 'idiv', 'ifloordiv',
                     'ilshift', 'imod', 'imul', 'ior', 'ipow', 'irepeat',
-                    'irshift', 'isub', 'itruediv', 'ixor']
+                    'irshift', 'isub', 'itruediv', 'ixor', '_length_hint']
 
     interpleveldefs = {}
 

pypy/module/operator/interp_operator.py

 from pypy.interpreter.error import OperationError
+from pypy.interpreter.gateway import unwrap_spec
 
 def index(space, w_a):
     return space.index(w_a)
 
     return space.inplace_mul(w_obj1, w_obj2)
 
+# _length_hint (to be length_hint in 3.4)
+
+@unwrap_spec(default=int)
+def _length_hint(space, w_iterable, default):
+    return space.wrap(space.length_hint(w_iterable, default))
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.