Source

woocode / py / sa / parse_log.py

Full commit
#!/usr/bin/env python
# -*- encoding:utf-8 -*-


'''
2012-05-03 -- 2012-05-09
uid 登录总数 总挖图次数(包含成功和失败) 总打图次数(包含成功和失败)

ID IP 打图成功 打图失败 挖图成功 挖图失败

'''
import re
import os
import sys
import tarfile
import cPickle
from datetime import datetime, timedelta
from collections import defaultdict
from pprint import pprint

fail_text = u'挖图失败'.encode('gb18030')

watu_fail_pat = re.compile(fail_text)
watu_succ_pat = re.compile(r'Get\s+reward')
datu_fail_pat = re.compile(r'Fail')
datu_succ_pat = re.compile(r'Complete')

brackets_pat = re.compile(r'\[(.+?)\]')

ums_pat = re.compile(r'ums=(.+?),')
uid_pat = re.compile(r'uid=(\d+)')
ip_pat = re.compile(r'ip=([\d\.]+)')
date_pat = re.compile(r'\[([-\d:\s]+)\]')
key_pat = re.compile(r'key=(.+?),')
mission_uid_pat = re.compile(r'[(Fail)|(Complete)]:(\d+)')

datetime_fmt = '%Y-%m-%d %H:%M:%S'
day_fmt = '%Y-%m-%d'
hour_fmt = '%Y-%m-%d %H'
minute_fmt = '%Y-%m-%d %H:%M'

# uid --> ums 的映射
UID_UMS_MAP = {}
# ums --> uid 的映射
UMS_UID_MAP = defaultdict(set)

# 统计uid登录次数
UID_LOGIN = {}
# 统计最后一次 ums 的登录ip
ums_login = {}

# 挖图信息
WATU = {}
# 打图信息
DATU = {}

def load_obj(fp):
    print 'loading ', fp
    return cPickle.load(file(fp))

def tongji(server_id, start_date, end_date):
    uid_ums_map = load_obj('%s-uid_ums_map.pkl' % server_id)
    watu = load_obj('%s-watu.pkl'% server_id)
    datu = load_obj('%s-datu.pkl' % server_id)
    uid_login = load_obj('%s-uid_login.pkl' % server_id)
    ums_login = load_obj('%s-ums_login.pkl' % server_id)
    # ums_uid_map = '%s-ums_uid_map.pkl' % server_id

    date_line = '%s - %s' % (start_date, end_date)
    csv = '%s-%s-result.csv' % (server_id, date_line)


    start_date = get_dateobj_from_str(start_date, day_fmt)
    end_date = get_dateobj_from_str(end_date, day_fmt)

    def filter_by_date(dct, start, end):
        dates = dct.keys()
        for date in dates:
            dobj = get_dateobj_from_str(date, day_fmt)
            if dobj >= start_date:
                if dobj <= end_date:
                    continue
            del dct[date]
        pprint( dct.keys())
        return dct

    def get_ip_by_uid(uid):
        ip = 'unknown'
        ums = uid_ums_map.get(uid, 'unknown')
        if ums == 'unknown':
            ip = 'unknown'
        else:
            days = ums_login.keys()
            days.reverse()
            for day in days:
                if ums in ums_login[day]:
                    ip = ums_login[day][ums]
        return ip

    def parse_cangbao(dct):
        succ_count = 0
        fail_count = 0
        uid_map = {}
        for uid, info in dct.iteritems():
            succ = info.get('succ', 0)
            fail = info.get('fail', 0)
            succ_count += succ
            fail_count += fail
            uid_map[uid] = {'succ': succ, 'fail': fail}
        return succ_count, fail_count, uid_map

    print 'write content to %s' % csv

    fobj = open(csv, 'wb')

    _uid_login = filter_by_date(uid_login, start_date, end_date)
    _watu = filter_by_date(watu, start_date, end_date)
    _datu = filter_by_date(datu, start_date, end_date)

    print '----'
    print date_line
    # 时间段内总的登录总数
    login_count = 0
    uids = set([])
    for date, item in _uid_login.iteritems():
        for k, v in item.iteritems():
            uids.add(k)
            login_count += v

    watu_count = {'fail': 0, 'succ': 0}
    uid_watu_count = {}
    for date, item in _watu.iteritems():
        for uid, m in item.iteritems():
            uid_watu_count.setdefault(uid, {})
            uid_watu_count[uid].setdefault('fail', 0)
            uid_watu_count[uid].setdefault('succ', 0)
            uid_watu_count[uid]['fail'] += m['fail']
            uid_watu_count[uid]['succ'] += m['succ']
            watu_count['fail'] += m['fail']
            watu_count['succ'] += m['succ']

    uid_datu_count = {}
    datu_count = {'fail': 0, 'succ': 0}
    for date, item in _datu.iteritems():
        for uid, m in item.iteritems():
            uid_datu_count.setdefault(uid, {})
            uid_datu_count[uid].setdefault('fail', 0)
            uid_datu_count[uid].setdefault('succ', 0)
            uid_datu_count[uid]['fail'] += m['fail']
            uid_datu_count[uid]['succ'] += m['succ']
            datu_count['fail'] += m['fail']
            datu_count['succ'] += m['succ']

    cangbao_count = 0
    for key in ['fail', 'succ']:
        cangbao_count += datu_count[key] + watu_count[key]

    li2line = lambda x: ','.join(['"%s"' % l for l in x])

    fobj.write(u'日期,登录UID数,藏宝总数,挖图总数,打图总数\n'.encode('gb18030'))
    sum_line = [date_line, login_count, cangbao_count, watu_count, datu_count]
    fobj.write(li2line(sum_line) + '\n')

    fobj.write('\n')
    fobj.write(u'ID,IP,打图成功,打图失败,挖图成功,挖图失败\n'.encode('gb18030'))

    for uid in uids:
        found_uid_watu = True
        found_uid_datu = True
        # 不统计藏家寻宝活的玩家
        if uid not in uid_datu_count:
            uid_datu_count[uid] = {'fail': 0, 'succ': 0}
            found_uid_datu = False
        if uid not in uid_watu_count:
            uid_watu_count[uid] = {'fail': 0, 'succ': 0}
            found_uid_watu = False
        if not (found_uid_datu | found_uid_watu):
            continue
        line = [uid, get_ip_by_uid(uid),
                uid_datu_count[uid]['succ'], uid_datu_count[uid]['fail'],
                uid_watu_count[uid]['succ'], uid_watu_count[uid]['fail']]
        fobj.write(li2line(line) + '\n')
    fobj.close()

def dump_obj(fn, obj):
    with open(fn, 'wb') as fb:
        cPickle.dump(obj, fb)

def get_ret(reg, s, gn=1):
    m = reg.search(s)
    if m:
        return m.group(gn)

def get_dateobj_from_str(s, fmt=datetime_fmt):
    if isinstance(s, str):
        ret = datetime.strptime(s, fmt)
    elif isinstance(s, type(datetime.now())):
        ret = s
    else:
        raise TypeError("datetime type error")
    return ret

def parse_line(line, pat=[]):
    ret = []
    if len(pat) == 1:
        ret = pat[0].findall(line)
    else:
        for pa in pat:
            r = get_ret(pa, line)
            ret.append(r)
    return ret

def datetime_to_day(date, fmt=datetime_fmt):
    date_obj = get_dateobj_from_str(date)
    return date_obj.strftime(day_fmt)

def parse_watu(fobj):
    '''
    挖图
    Get Reward --> 挖图成功
    战斗失败   --> 挖图失败
    '''
    def _parse(line, s='fail'):
        parts = parse_line(line, [brackets_pat])
        if len(parts) < 2 or (not all(parts)):
            print '[ERR LINE]', line
            return None
        date = parts[0]
        uid = parts[1]
        day = datetime_to_day(date)

        WATU.setdefault(day, {})
        WATU[day].setdefault(uid, {})
        WATU[day][uid].setdefault('succ', 0)
        WATU[day][uid].setdefault('fail', 0)

        WATU[day][uid][s] += 1
        return WATU[day][uid][s]

    for line in fobj:
        if watu_fail_pat.search(line):
            _parse(line, 'fail')

        elif watu_succ_pat.search(line):
            _parse(line, 'succ')

def parse_datu(fobj):
    '''
    打图
    Complete   --> 成功
    Fail       --> 失败
    '''

    def _parse(line, s='fail'):

        parts = parse_line(line, [date_pat, mission_uid_pat])
        if len(parts) < 2 or (not all(parts)):
            print '[ERR LINE]', line
            return None
        date = parts[0]
        uid = parts[1]
        day = datetime_to_day(date)

        DATU.setdefault(day, {})
        DATU[day].setdefault(uid, {})
        #DATU[day][uid].setdefault(s, 0)
        DATU[day][uid].setdefault('fail', 0)
        DATU[day][uid].setdefault('succ', 0)

        DATU[day][uid][s] += 1
        return DATU[day][uid][s]

    for line in fobj:
        if datu_fail_pat.search(line):
            _parse(line, 'fail')

        elif datu_succ_pat.search(line):
            _parse(line, 'succ')

def parse_login(fobj):
    '''
    统计 uid 登录次数,最后一次 ip
    [2012-04-26 09:04:40] uid enter game vfd=259,ums=fdg1204,uid=1067015
    [2012-04-26 09:04:39] client use loginkey ok vfd=259,ums=0,key=:1036:fdg1204:1919133540226506612:2012-04-26 09:04:38:3555,ip=113.95.229.13
    '''
    for line in fobj:
        if 'client use loginkey ok vfd=' in line:
            # 统计 最后一次 ip
            date = get_ret(date_pat, line)
            #vfd = get_ret(vfd_pat, line)
            key = get_ret(key_pat, line)
            ip = get_ret(ip_pat, line)
            if not all([date, key, ip]):
                print '[ERR LINE]', line
                continue

            date_obj = get_dateobj_from_str(date)
            day = date_obj.strftime(day_fmt)

            key_parts = key.split(':')
            # ums 只能从key的字段里获取
            ums = key_parts[2]

            # 统计 最后一次 ip
            if day not in ums_login:
                ums_login[day] = {ums: ip}
            else:
                ums_login[day][ums] = ip

        elif 'uid enter game vfd=' in line:
            date = get_ret(date_pat, line)
            ums = get_ret(ums_pat, line)
            uid = get_ret(uid_pat, line)
            if not all([date, ums, uid]):
                print '[ERR LINE]', line

            date_obj = get_dateobj_from_str(date)
            day = date_obj.strftime(day_fmt)

            UID_LOGIN.setdefault(day, {})
            UID_LOGIN[day].setdefault(uid, 0)
            UID_LOGIN[day][uid] += 1

            UMS_UID_MAP[ums].add(uid)
            UID_UMS_MAP.setdefault(uid, ums)

def get_fobj_from_tarfile(tr):
    tf = tarfile.open(tr)
    fobj = tf.extractfile(tf.members[0])
    return fobj

def main():
    logdir = sys.argv[1]
    server_id = sys.argv[2]
    for fn in os.listdir(logdir):
        fp = os.path.join(logdir, fn)
        print 'parsing', fp
        fobj = get_fobj_from_tarfile(fp)
        if 'login_' in fn:
            print 'enter login logic'
            parse_login(fobj)
        elif 'cangbao_' in fn:
            print 'enter cangbao logic'
            parse_watu(fobj)
        elif 'mission_' in fn:
            print 'enter mission logic'
            parse_datu(fobj)
        fobj.close()
        del fobj

    dump_obj('%s-uid_ums_map.pkl' % server_id, UID_UMS_MAP)
    dump_obj('%s-ums_uid_map.pkl' % server_id, UMS_UID_MAP)
    dump_obj('%s-watu.pkl'% server_id, WATU)
    dump_obj('%s-datu.pkl' % server_id, DATU)
    dump_obj('%s-uid_login.pkl' % server_id, UID_LOGIN)
    dump_obj('%s-ums_login.pkl' % server_id, ums_login)

def output():
    date_tuple = (('2012-05-03', '2012-05-09'),
                  ('2012-05-25', '2012-05-31'),
                  ('2012-06-01', '2012-06-07'),
                  )

    server_ids = ('1036', '1037')
    for id in server_ids:
        for start_date, end_date in date_tuple:
            output_result(id, start_date, end_date)

    for start_date, end_date in date_tuple[1:]:
        output_result('1052', start_date, end_date)

if __name__ == '__main__':
    main()