# HG changeset patch # User Sergey Astanin # Date 1349914139 -7200 # Node ID 51a01cf9ad6b2cec60afb0b6a84a566252a29dfc # Parent b412b3abc1e4032fb391d67eadb141d25c65cbec update README and setup script diff --git a/README b/README --- a/README +++ b/README @@ -14,19 +14,29 @@ Usage ----- +All functions in this module return iterators, and consume input +lazily. In the examples below, the results are forced using ``list`` +and ``dict``. + +Chunks of equal size +~~~~~~~~~~~~~~~~~~~~ + To partition a sequence into chunks of equal size, use ``chop``:: >>> from split import chop >>> list(chop(3, range(10))) [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]] -If ``truncate=True`` keyword argument is given, sequence length is +If ``truncate=True`` keyword argument is given, then sequence length is truncated to a multiple of chunk size, and all chunks have the same size:: >>> list(chop(3, range(10), truncate=True)) [[0, 1, 2], [3, 4, 5], [6, 7, 8]] +Subsequences by a predicate +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + To split a sequence into two by a given predicate, use ``partition``:: >>> from split import partition @@ -34,9 +44,36 @@ >>> map(list, partition(odd, range(5))) [[1, 3], [0, 2, 4]] -To break a sequence into chunks on some separator elements, use ``split``:: +For more general partitioning, use ``groupby``:: + + >>> [(k, list(i)) for k,i in groupby(lambda x: x%3, range(7))] + [(0, [0, 3, 6]), (1, [1, 4]), (2, [2, 5])] + +This function is different from ``itertools.groupby``: it returns only +one subsequence iterator per predicate value. Its return value can be +converted into dictionary. + +When working with very long sequences, consider using +``predicate_values`` keyword argument to avoid scanning the entire +sequence. For example:: + + >>> longseq = xrange(int(1e9)) + >>> pred = lambda x: x%3 + >>> dict(groupby(pred, longseq, predicate_values=(0,1,2))) + {0: , + 1: , + 2: } + +Breaking on separators +~~~~~~~~~~~~~~~~~~~~~~ + +To break a sequence into chunks on some separators, use ``split``. For +example, breaking on zero elements:: >>> list(split(0, [1,2,3,0,4,5,0,0,6])) [[1, 2, 3], [4, 5], [], [6]] -All functions return iterators. +You can use a function as a predicate too:: + + >>> list(split(lambda x: x==5, range(10))) + [[0, 1, 2, 3, 4], [6, 7, 8, 9]] diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ LICENSE = open("LICENSE").read() setup(name='split', - version='0.3-SNAPSHOT', + version='0.3', description='Functions to split or partition sequences.', long_description=LONG_DESCRIPTION, author='Sergey Astanin',