woocode / py / sa / statistics /

# -*- encoding:utf-8 -*-
import os
import re
import tarfile
from urlparse import urlparse
from time import time
from glob import fnmatch

def get_files_from_dir(path, fn_pat):

    re_type = type(re.compile('foo'))
    if isinstance(fn_pat, str):
        files = (os.path.join(path, f) for f in os.listdir(path)
                    if fn_pat in f or fnmatch.fnmatch(f, fn_pat))
    elif isinstance(fn_pat, re_type):
        files = (os.path.join(path, f) for f in os.listdir(path)
        raise TypeError("<fn_path> only support regexp or str, got: %r" % repr(type(fn_pat)))

    return files

def dump_obj(fn, obj):
    import pickle
    with open(fn, 'wb') as fb:
        pickle.dump(obj, fb)

def timeit(func):
    def wrapper(*args, **kwargs):
        start_time = time()
        ret = func(*args, **kwargs)
        print 'Cost %.2f secs' % (time() - start_time)
        return ret
    return wrapper

def query_to_dict(query):
    >>> q = 'a=a&b=b'
    >>> query_to_dict(q)
    {'a': 'a', 'b': 'b'}
    >>> q = 'a=a&b=b&c='
    >>> query_to_dict(q)
    {'a': 'a', 'c': '', 'b': 'b'}
    >>> q = 'a=a%XX&b=b&c=c'
    >>> query_to_dict(q)
    {'a': 'a%XX', 'c': 'c', 'b': 'b'}
    return dict([k.split('=') for k in query.split('&')])

def parse_request_url(url_str):
    method, url, protocol = url_str.split()
    o = urlparse.urlparse(url)
    ret = query_to_dict(o.query)
    # convert all str to int which is type is number
    for k, v in ret.iteritems():
        if v.isdigit():
            ret[k] = int(v)
    return ret

def parse_non_digit(s, default=0, keep_postive=True):
    >>> parse_non_digit('yu')
    >>> parse_non_digit('-11')
    >>> parse_non_digit('-11', False)
    >>> parse_non_digit('')
    >>> parse_non_digit(0)
    >>> parse_non_digit(1)
    >>> parse_non_digit(32321)
    ret = 0
    if isinstance(s, int):
        ret = s
    elif isinstance(s, str):
            ret = int(s)
        except ValueError:
    if keep_postive:
        ret = ret > 0 and ret or 0
    return ret

def get_files_by_ext(path, ext):

    for fn in os.listdir(path):
        if fn.endswith(ext):
            yield os.path.join(path, fn)

class LineGenerator(object):
    def __init__(self, fobj):
        if isinstance(fobj, (file, tarfile.ExFileObject)):
            self.fobj = fobj
        elif isinstance(fobj, str):
            self.fobj = open(fobj)
            raise TypeError('fobj only support file object or file path. Got: %r' % type(fobj))

    def get_lines(self):
        for line in self.fobj:
            line = line.strip()
            yield line

def get_logobj_from_tarfile(filename, mem_name, generator):

    tf =
    for member in tf.members:
        if os.path.basename( == mem_name:
            yield generator(tf.extractfile(member))