Commits

yang xiaoyong  committed b0be332 Draft

重命令

  • Participants
  • Parent commits 9fde777

Comments (0)

Files changed (2)

File py/sa/parse_canbao.py

-#!/usr/bin/env python
-# -*- encoding:utf-8 -*-
-
-
-'''
-2012-05-03 -- 2012-05-09
-uid 登录总数 总挖图次数(包含成功和失败) 总打图次数(包含成功和失败)
-
-ID IP 打图成功 打图失败 挖图成功 挖图失败
-
-'''
-import re
-import os
-import sys
-import tarfile
-import cPickle
-import optparse
-from datetime import datetime, timedelta
-from collections import defaultdict
-from pprint import pprint
-
-fail_text = u'战斗失败'.encode('gb18030')
-
-watu_fail_pat = re.compile(fail_text)
-watu_succ_pat = re.compile(r'Get\s+reward')
-datu_fail_pat = re.compile(r'Fail:\d+\s+[\'"]?cg_cbt')
-datu_succ_pat = re.compile(r'Complete:\d+\s+[\'"]?cg_cbt')
-
-brackets_pat = re.compile(r'\[(.+?)\]')
-
-ums_pat = re.compile(r'ums=(.+?),')
-uid_pat = re.compile(r'uid=(\d+)')
-ip_pat = re.compile(r'ip=([\d\.]+)')
-date_pat = re.compile(r'\[([-\d:\s]+)\]')
-key_pat = re.compile(r'key=(.+?),')
-mission_uid_pat = re.compile(r'[(Fail)|(Complete)]:(\d+)')
-
-datetime_fmt = '%Y-%m-%d %H:%M:%S'
-day_fmt = '%Y-%m-%d'
-hour_fmt = '%Y-%m-%d %H'
-minute_fmt = '%Y-%m-%d %H:%M'
-
-# uid --> ums 的映射
-UID_UMS_MAP = {}
-# ums --> uid 的映射
-UMS_UID_MAP = defaultdict(set)
-
-# 统计所有成功uid登录次数
-UID_LOGIN = {}
-# 统计最后一次 ums 的登录ip
-ums_login = {}
-
-# 挖图信息
-WATU = {}
-# 打图信息
-DATU = {}
-
-def load_obj(fp):
-    # print 'loading ', fp
-    return cPickle.load(file(fp))
-
-def tongji(server_id, start_date, end_date):
-    uid_ums_map = load_obj('%s-uid_ums_map.pkl' % server_id)
-    watu = load_obj('%s-watu.pkl'% server_id)
-    datu = load_obj('%s-datu.pkl' % server_id)
-    uid_login = load_obj('%s-uid_login.pkl' % server_id)
-    ums_login = load_obj('%s-ums_login.pkl' % server_id)
-    # ums_uid_map = '%s-ums_uid_map.pkl' % server_id
-
-    date_line = '%s - %s' % (start_date, end_date)
-    csv = '%s-%s-result.csv' % (server_id, date_line)
-
-
-    start_date = get_dateobj_from_str(start_date, day_fmt)
-    end_date = get_dateobj_from_str(end_date, day_fmt)
-
-    def filter_by_date(dct, start, end):
-        dates = dct.keys()
-        for date in dates:
-            dobj = get_dateobj_from_str(date, day_fmt)
-            if dobj >= start_date:
-                if dobj < (end_date + timedelta(1)):
-                    continue
-            del dct[date]
-        return dct
-
-    def get_ip_by_uid(uid):
-        ip = 'unknown'
-        ums = uid_ums_map.get(uid, 'unknown')
-        if ums == 'unknown':
-            ip = 'unknown'
-        else:
-            days = ums_login.keys()
-            days.reverse()
-            for day in days:
-                if ums in ums_login[day]:
-                    ip = ums_login[day][ums]
-        return ip
-
-    def parse_cangbao(dct):
-        succ_count = 0
-        fail_count = 0
-        uid_map = {}
-        for uid, info in dct.iteritems():
-            succ = info.get('succ', 0)
-            fail = info.get('fail', 0)
-            succ_count += succ
-            fail_count += fail
-            uid_map[uid] = {'succ': succ, 'fail': fail}
-        return succ_count, fail_count, uid_map
-
-    fobj = open(csv, 'wb')
-
-    _uid_login = filter_by_date(uid_login, start_date, end_date)
-
-    _watu = filter_by_date(watu, start_date, end_date)
-    _datu = filter_by_date(datu, start_date, end_date)
-
-    print '----'
-    print date_line
-    # 时间段内总的登录总数
-    login_count = 0
-    uids = set([])
-    # 统计有效的 uid 数量
-    for date, item in _uid_login.iteritems():
-        for k, v in item.iteritems():
-            uids.add(k)
-            login_count += v
-
-    print '%s %s --> %d' % (server_id, date_line, len(uids))
-    watu_count = {'fail': 0, 'succ': 0}
-    uid_watu_count = {}
-    for date, item in _watu.iteritems():
-        for uid, m in item.iteritems():
-            uid_watu_count.setdefault(uid, {})
-            uid_watu_count[uid].setdefault('fail', 0)
-            uid_watu_count[uid].setdefault('succ', 0)
-            uid_watu_count[uid]['fail'] += m['fail']
-            uid_watu_count[uid]['succ'] += m['succ']
-            watu_count['fail'] += m['fail']
-            watu_count['succ'] += m['succ']
-
-    uid_datu_count = {}
-    datu_count = {'fail': 0, 'succ': 0}
-    for date, item in _datu.iteritems():
-        for uid, m in item.iteritems():
-            uid_datu_count.setdefault(uid, {})
-            uid_datu_count[uid].setdefault('fail', 0)
-            uid_datu_count[uid].setdefault('succ', 0)
-            uid_datu_count[uid]['fail'] += m['fail']
-            uid_datu_count[uid]['succ'] += m['succ']
-            datu_count['fail'] += m['fail']
-            datu_count['succ'] += m['succ']
-
-    cangbao_count = 0
-    for key in ['fail', 'succ']:
-        cangbao_count += datu_count[key] + watu_count[key]
-
-    li2line = lambda x: ','.join(['"%s"' % l for l in x])
-
-    fobj.write(u'日期,登录UID数,藏宝总数,挖图总数,打图总数\n'.encode('gb18030'))
-    sum_line = [date_line, login_count, cangbao_count, watu_count, datu_count]
-    fobj.write(li2line(sum_line) + '\n')
-
-    fobj.write('\n')
-    fobj.write(u'ID,IP,打图成功,打图失败,挖图成功,挖图失败\n'.encode('gb18030'))
-
-    for uid in uids:
-        found_uid_watu = True
-        found_uid_datu = True
-        # 不统计藏家寻宝活的玩家
-        if uid not in uid_datu_count:
-            uid_datu_count[uid] = {'fail': 0, 'succ': 0}
-            found_uid_datu = False
-        if uid not in uid_watu_count:
-            uid_watu_count[uid] = {'fail': 0, 'succ': 0}
-            found_uid_watu = False
-        if not (found_uid_datu | found_uid_watu):
-            continue
-        line = [uid, get_ip_by_uid(uid),
-                uid_datu_count[uid]['succ'], uid_datu_count[uid]['fail'],
-                uid_watu_count[uid]['succ'], uid_watu_count[uid]['fail']]
-        fobj.write(li2line(line) + '\n')
-    fobj.close()
-
-def dump_obj(fn, obj):
-    with open(fn, 'wb') as fb:
-        cPickle.dump(obj, fb)
-
-def get_ret(reg, s, gn=1):
-    m = reg.search(s)
-    if m:
-        return m.group(gn)
-
-def get_dateobj_from_str(s, fmt=datetime_fmt):
-    if isinstance(s, str):
-        ret = datetime.strptime(s, fmt)
-    elif isinstance(s, type(datetime.now())):
-        ret = s
-    else:
-        raise TypeError("datetime type error")
-    return ret
-
-def parse_line(line, pat=[]):
-    ret = []
-    if len(pat) == 1:
-        ret = pat[0].findall(line)
-    else:
-        for pa in pat:
-            r = get_ret(pa, line)
-            ret.append(r)
-    return ret
-
-def datetime_to_day(date, fmt=datetime_fmt):
-    date_obj = get_dateobj_from_str(date)
-    return date_obj.strftime(day_fmt)
-
-def parse_watu(fobj):
-    '''
-    挖图
-    Get Reward --> 挖图成功
-    战斗失败   --> 挖图失败
-    '''
-    def _parse(line, s='fail'):
-        parts = parse_line(line, [brackets_pat])
-        if len(parts) < 2 or (not all(parts)):
-            print '[ERR LINE]', line
-            return None
-        date = parts[0]
-        uid = parts[1]
-        day = datetime_to_day(date)
-
-        WATU.setdefault(day, {})
-        WATU[day].setdefault(uid, {})
-        WATU[day][uid].setdefault('succ', 0)
-        WATU[day][uid].setdefault('fail', 0)
-
-        WATU[day][uid][s] += 1
-        return WATU[day][uid][s]
-
-    for line in fobj:
-        if watu_fail_pat.search(line):
-            _parse(line, 'fail')
-
-        elif watu_succ_pat.search(line):
-            _parse(line, 'succ')
-
-def parse_datu(fobj):
-    '''
-    打图
-    Complete   --> 成功
-    Fail       --> 失败
-    '''
-
-    def _parse(line, s='fail'):
-
-        parts = parse_line(line, [date_pat, mission_uid_pat])
-        if len(parts) < 2 or (not all(parts)):
-            print '[ERR LINE]', line
-            return None
-        date = parts[0]
-        uid = parts[1]
-        day = datetime_to_day(date)
-
-        DATU.setdefault(day, {})
-        DATU[day].setdefault(uid, {})
-        #DATU[day][uid].setdefault(s, 0)
-        DATU[day][uid].setdefault('fail', 0)
-        DATU[day][uid].setdefault('succ', 0)
-
-        DATU[day][uid][s] += 1
-        return DATU[day][uid][s]
-
-    for line in fobj:
-        if datu_fail_pat.search(line):
-            _parse(line, 'fail')
-
-        elif datu_succ_pat.search(line):
-            _parse(line, 'succ')
-
-def parse_login(fobj):
-    '''
-    统计 uid 登录次数,最后一次 ip
-    [2012-04-26 09:04:40] uid enter game vfd=259,ums=fdg1204,uid=1067015
-    [2012-04-26 09:04:39] client use loginkey ok vfd=259,ums=0,key=:1036:fdg1204:1919133540226506612:2012-04-26 09:04:38:3555,ip=113.95.229.13
-    '''
-    for line in fobj:
-        if 'client use loginkey ok vfd=' in line:
-            # 统计 最后一次 ip
-            date = get_ret(date_pat, line)
-            #vfd = get_ret(vfd_pat, line)
-            key = get_ret(key_pat, line)
-            ip = get_ret(ip_pat, line)
-            if not all([date, key, ip]):
-                print '[ERR LINE]', line
-                continue
-
-            date_obj = get_dateobj_from_str(date)
-            day = date_obj.strftime(day_fmt)
-
-            key_parts = key.split(':')
-            # ums 只能从key的字段里获取
-            ums = key_parts[2]
-
-            # 统计 最后一次 ip
-            if day not in ums_login:
-                ums_login[day] = {ums: ip}
-            else:
-                ums_login[day][ums] = ip
-
-        elif 'uid enter game vfd=' in line:
-            date = get_ret(date_pat, line)
-            ums = get_ret(ums_pat, line)
-            uid = get_ret(uid_pat, line)
-            if not all([date, ums, uid]):
-                print '[ERR LINE]', line
-
-            date_obj = get_dateobj_from_str(date)
-            day = date_obj.strftime(day_fmt)
-
-            UID_LOGIN.setdefault(day, {})
-            UID_LOGIN[day].setdefault(uid, 0)
-            UID_LOGIN[day][uid] += 1
-
-            UMS_UID_MAP[ums].add(uid)
-            UID_UMS_MAP.setdefault(uid, ums)
-
-def get_fobj_from_tarfile(tr):
-    tf = tarfile.open(tr)
-    fobj = tf.extractfile(tf.members[0])
-    return fobj
-
-def cmd_parse(args, opts):
-    """parse - parse log directory Usage: <server_id_path> <server_id>
-    """
-    logdir = args[1]
-    server_id = args[2]
-    import pdb;pdb.set_trace()
-    for fn in os.listdir(logdir):
-        fp = os.path.join(logdir, fn)
-        print 'parsing', fp
-        fobj = get_fobj_from_tarfile(fp)
-        if 'login_' in fn:
-            print 'enter login logic'
-            parse_login(fobj)
-        elif 'cangbao_' in fn:
-            print 'enter cangbao logic'
-            parse_watu(fobj)
-        elif 'mission_' in fn:
-            print 'enter mission logic'
-            parse_datu(fobj)
-        fobj.close()
-        del fobj
-
-    dump_obj('%s-uid_ums_map.pkl' % server_id, UID_UMS_MAP)
-    dump_obj('%s-ums_uid_map.pkl' % server_id, UMS_UID_MAP)
-    dump_obj('%s-watu.pkl'% server_id, WATU)
-    dump_obj('%s-datu.pkl' % server_id, DATU)
-    dump_obj('%s-uid_login.pkl' % server_id, UID_LOGIN)
-    dump_obj('%s-ums_login.pkl' % server_id, ums_login)
-
-def cmd_output(args, opts):
-    '''output log statistics'''
-    date_tuple = (
-            ('2012-05-03', '2012-05-09'),
-            ('2012-05-10', '2012-05-16'),
-            ('2012-05-17', '2012-05-23'),
-            ('2012-05-25', '2012-05-31'),
-            ('2012-06-01', '2012-06-07'),
-            ('2012-06-08', '2012-06-14'),
-   )
-
-    server_ids = ('1036', '1037')
-    for id in server_ids:
-        for start_date, end_date in date_tuple:
-            tongji(id, start_date, end_date)
-
-    for start_date, end_date in date_tuple[1:]:
-        tongji('1052', start_date, end_date)
-
-def cmd_help(args, opts):
-    """help - list available commands"""
-
-    print "Available commands:"
-    for _, func in sorted(get_commands().items()):
-        print "   ", func.__doc__
-
-def get_commands():
-    return {'help': cmd_help,
-            'parse': cmd_parse,
-            'output': cmd_output}
-
-def parse_opts():
-    usage = "%prog [options] <command> [arg] ..."
-    description = (u"Log parse, statistics too. Use `%prog help`"
-        "to see the list of available commands.")
-    op = optparse.OptionParser(usage=usage, description=description)
-    opts, args = op.parse_args()
-    if not args:
-        op.print_help()
-        sys.exit(2)
-    cmdname, cmdargs, opts = args[0], args[1:], opts
-    commands = get_commands()
-    if cmdname not in commands:
-        print >> sys.stdout, "Unknown command: %s\n\n" % cmdname
-        cmd_help(None, None)
-        sys.exit(1)
-    return commands[cmdname], cmdargs, opts
-
-def main():
-    cmd, args, opts = parse_opts()
-    try:
-        cmd(args, opts)
-    except IndexError:
-        print cmd.__doc__
-
-if __name__ == '__main__':
-    main()

File py/sa/parse_log.py

 import sys
 import tarfile
 import cPickle
+import optparse
 from datetime import datetime, timedelta
 from collections import defaultdict
 from pprint import pprint
 
-fail_text = u'挖图失败'.encode('gb18030')
+fail_text = u'战斗失败'.encode('gb18030')
 
 watu_fail_pat = re.compile(fail_text)
 watu_succ_pat = re.compile(r'Get\s+reward')
-datu_fail_pat = re.compile(r'Fail')
-datu_succ_pat = re.compile(r'Complete')
+datu_fail_pat = re.compile(r'Fail:\d+\s+[\'"]?cg_cbt')
+datu_succ_pat = re.compile(r'Complete:\d+\s+[\'"]?cg_cbt')
 
 brackets_pat = re.compile(r'\[(.+?)\]')
 
 # ums --> uid 的映射
 UMS_UID_MAP = defaultdict(set)
 
-# 统计uid登录次数
+# 统计所有成功uid登录次数
 UID_LOGIN = {}
 # 统计最后一次 ums 的登录ip
 ums_login = {}
 DATU = {}
 
 def load_obj(fp):
-    print 'loading ', fp
+    # print 'loading ', fp
     return cPickle.load(file(fp))
 
 def tongji(server_id, start_date, end_date):
         for date in dates:
             dobj = get_dateobj_from_str(date, day_fmt)
             if dobj >= start_date:
-                if dobj <= end_date:
+                if dobj < (end_date + timedelta(1)):
                     continue
             del dct[date]
-        pprint( dct.keys())
         return dct
 
     def get_ip_by_uid(uid):
             uid_map[uid] = {'succ': succ, 'fail': fail}
         return succ_count, fail_count, uid_map
 
-    print 'write content to %s' % csv
-
     fobj = open(csv, 'wb')
 
     _uid_login = filter_by_date(uid_login, start_date, end_date)
+
     _watu = filter_by_date(watu, start_date, end_date)
     _datu = filter_by_date(datu, start_date, end_date)
 
     # 时间段内总的登录总数
     login_count = 0
     uids = set([])
+    # 统计有效的 uid 数量
     for date, item in _uid_login.iteritems():
         for k, v in item.iteritems():
             uids.add(k)
             login_count += v
 
+    print '%s %s --> %d' % (server_id, date_line, len(uids))
     watu_count = {'fail': 0, 'succ': 0}
     uid_watu_count = {}
     for date, item in _watu.iteritems():
     fobj = tf.extractfile(tf.members[0])
     return fobj
 
-def main():
-    logdir = sys.argv[1]
-    server_id = sys.argv[2]
+def cmd_parse(args, opts):
+    """parse - parse log directory Usage: <server_id_path> <server_id>
+    """
+    logdir = args[1]
+    server_id = args[2]
+    import pdb;pdb.set_trace()
     for fn in os.listdir(logdir):
         fp = os.path.join(logdir, fn)
         print 'parsing', fp
     dump_obj('%s-uid_login.pkl' % server_id, UID_LOGIN)
     dump_obj('%s-ums_login.pkl' % server_id, ums_login)
 
-def output():
-    date_tuple = (('2012-05-03', '2012-05-09'),
-                  ('2012-05-25', '2012-05-31'),
-                  ('2012-06-01', '2012-06-07'),
-                  )
+def cmd_output(args, opts):
+    '''output log statistics'''
+    date_tuple = (
+            ('2012-05-03', '2012-05-09'),
+            ('2012-05-10', '2012-05-16'),
+            ('2012-05-17', '2012-05-23'),
+            ('2012-05-25', '2012-05-31'),
+            ('2012-06-01', '2012-06-07'),
+            ('2012-06-08', '2012-06-14'),
+   )
 
     server_ids = ('1036', '1037')
     for id in server_ids:
         for start_date, end_date in date_tuple:
-            output_result(id, start_date, end_date)
+            tongji(id, start_date, end_date)
 
     for start_date, end_date in date_tuple[1:]:
-        output_result('1052', start_date, end_date)
+        tongji('1052', start_date, end_date)
+
+def cmd_help(args, opts):
+    """help - list available commands"""
+
+    print "Available commands:"
+    for _, func in sorted(get_commands().items()):
+        print "   ", func.__doc__
+
+def get_commands():
+    return {'help': cmd_help,
+            'parse': cmd_parse,
+            'output': cmd_output}
+
+def parse_opts():
+    usage = "%prog [options] <command> [arg] ..."
+    description = (u"Log parse, statistics too. Use `%prog help`"
+        "to see the list of available commands.")
+    op = optparse.OptionParser(usage=usage, description=description)
+    opts, args = op.parse_args()
+    if not args:
+        op.print_help()
+        sys.exit(2)
+    cmdname, cmdargs, opts = args[0], args[1:], opts
+    commands = get_commands()
+    if cmdname not in commands:
+        print >> sys.stdout, "Unknown command: %s\n\n" % cmdname
+        cmd_help(None, None)
+        sys.exit(1)
+    return commands[cmdname], cmdargs, opts
+
+def main():
+    cmd, args, opts = parse_opts()
+    try:
+        cmd(args, opts)
+    except IndexError:
+        print cmd.__doc__
 
 if __name__ == '__main__':
     main()