Commits

yang xiaoyong  committed 0c7e10d Draft

aa

  • Participants
  • Parent commits b0be332

Comments (0)

Files changed (1)

File py/sa/parse_log.py

 datu_succ_pat = re.compile(r'Complete:\d+\s+[\'"]?cg_cbt')
 
 brackets_pat = re.compile(r'\[(.+?)\]')
+tarfile_pat = re.compile(r'\.t[(ar)|(gz)]+(\.gz)?(\.bz\d?)?')
 
 ums_pat = re.compile(r'ums=(.+?),')
 uid_pat = re.compile(r'uid=(\d+)')
 # ums --> uid 的映射
 UMS_UID_MAP = defaultdict(set)
 
-# 统计所有成功uid登录次数
+# 所有 UID 每天的登录信息 {'day': {'uid': 'login_counts'}}
 UID_LOGIN = {}
-# 统计最后一次 ums 的登录ip
-ums_login = {}
+
+# 所有 UMS 每天的登录信息(当天只记录最后一次登录IP) {'day': {'ums': ['ip1', 'ip2']}}
+UMS_LOGIN = {}
+
+# mini 客户端每天登录信息
+MINICLIENT_LOGIN = {}
 
 # 挖图信息
-WATU = {}
+WA_TU = {}
 # 打图信息
-DATU = {}
+DA_TU = {}
+
+li2line = lambda x: ','.join(['"%s"' % l for l in x])
 
 def load_obj(fp):
     # print 'loading ', fp
     return cPickle.load(file(fp))
 
-def tongji(server_id, start_date, end_date):
+def tongji_login(server_id):
+    '''
+    统计结果:
+    1. 微端登录数,按UMS算,如果UMS登录多次,当天只统计一次
+    2. 普通端登录数,按UMS算,如果UMS登录多次,当天只统计一次
+    3. 微端登录当天登录比例
+
+    - 3 月份之前的
+    - 3 月份之后的
+    '''
+    uid_ums_map = load_obj('%s-uid_ums_map.pkl' % server_id)
+    uid_login = load_obj('%s-uid_login.pkl' % server_id)
+    ums_login = load_obj('%s-ums_login.pkl' % server_id)
+    # ums_uid_map = '%s-ums_uid_map.pkl' % server_id
+    miniclient_login = load_obj('%s-miniclient_login.pkl' % server_id)
+    csv = '%s-ums_login.csv' % server_id
+
+    days = ums_login.keys()
+    days.sort()
+    before_march = []
+    after_march = []
+    march = get_dateobj_from_str('2010-03-01', day_fmt)
+    for day in days:
+        dobj = get_dateobj_from_str(day, day_fmt)
+        if dobj < march:
+            before_march.append(day)
+        else:
+            after_march.append(day)
+
+    all_ums = set([])
+    # 所有微端登录的 ums
+    mini_ums = set([])
+    # 所有正常版登录的 ums
+    norm_ums = set([])
+
+    for m in [before_march, after_march]:
+        n = 1
+        fobj = file(str(n) + csv, 'wb')
+        n += 1
+        sum_login_counts = len([len(v.keys()) for k, v in ums_login.iteritems()
+                                if k in m])
+        # uid -> ums
+        header_summary = [s.endcode('gb18030') for s in [u'日期', u'SERVER_ID', u'总登录数',
+                            u'微端登录数', u'普通版登录数', u'微端登录比例']]
+        fobj.write(li2line(header_summary) + '\n')
+        fobj.write(li2line([n, server_id, sum_login_counts, ]))
+        fobj.write('\n')
+
+        for day in before_march:
+            pass
+
+        fobj.write()
+    for day, item in ums_login.iteritems():
+        pass
+
+
+    header_detail = [s.encode('gb18030') for s in [u'日期', u'微端登录数', u'正常版登录数', u'微端比例']]
+    fobj.write(li2line(header_detail) + '\n')
+    fobj.close()
+
+def tongji_cangbao(server_id, start_date, end_date):
     uid_ums_map = load_obj('%s-uid_ums_map.pkl' % server_id)
     watu = load_obj('%s-watu.pkl'% server_id)
     datu = load_obj('%s-datu.pkl' % server_id)
             days.reverse()
             for day in days:
                 if ums in ums_login[day]:
-                    ip = ums_login[day][ums]
+                    # 取最后一次登录IP
+                    ip = ums_login[day][ums][-1]
         return ip
 
     def parse_cangbao(dct):
     for key in ['fail', 'succ']:
         cangbao_count += datu_count[key] + watu_count[key]
 
-    li2line = lambda x: ','.join(['"%s"' % l for l in x])
 
     fobj.write(u'日期,登录UID数,藏宝总数,挖图总数,打图总数\n'.encode('gb18030'))
     sum_line = [date_line, login_count, cangbao_count, watu_count, datu_count]
         uid = parts[1]
         day = datetime_to_day(date)
 
-        WATU.setdefault(day, {})
-        WATU[day].setdefault(uid, {})
-        WATU[day][uid].setdefault('succ', 0)
-        WATU[day][uid].setdefault('fail', 0)
+        WA_TU.setdefault(day, {})
+        WA_TU[day].setdefault(uid, {})
+        WA_TU[day][uid].setdefault('succ', 0)
+        WA_TU[day][uid].setdefault('fail', 0)
 
-        WATU[day][uid][s] += 1
-        return WATU[day][uid][s]
+        WA_TU[day][uid][s] += 1
+        return WA_TU[day][uid][s]
 
     for line in fobj:
         if watu_fail_pat.search(line):
         uid = parts[1]
         day = datetime_to_day(date)
 
-        DATU.setdefault(day, {})
-        DATU[day].setdefault(uid, {})
-        #DATU[day][uid].setdefault(s, 0)
-        DATU[day][uid].setdefault('fail', 0)
-        DATU[day][uid].setdefault('succ', 0)
+        DA_TU.setdefault(day, {})
+        DA_TU[day].setdefault(uid, {})
+        #DA_TU[day][uid].setdefault(s, 0)
+        DA_TU[day][uid].setdefault('fail', 0)
+        DA_TU[day][uid].setdefault('succ', 0)
 
-        DATU[day][uid][s] += 1
-        return DATU[day][uid][s]
+        DA_TU[day][uid][s] += 1
+        return DA_TU[day][uid][s]
 
     for line in fobj:
         if datu_fail_pat.search(line):
         elif datu_succ_pat.search(line):
             _parse(line, 'succ')
 
+def parse_minilog(fobj):
+    '''处理微端日志'''
+
+    for line in fobj:
+        parts = parse_line(line, brackets_pat)
+        if not all(parts) or len(parts) < 3:
+            print '[ERR LINE]', line
+            continue
+        date = parts[0]
+        uid = parts[1]
+        ip = parts[2]
+        day = datetime_to_day(date)
+        MINICLIENT_LOGIN.setdefault(day, defaultdict(list))
+        # 记录所有登录IP
+        MINICLIENT_LOGIN[day][uid].append(ip)
+
 def parse_login(fobj):
     '''
     统计 uid 登录次数,最后一次 ip
             # ums 只能从key的字段里获取
             ums = key_parts[2]
 
-            # 统计 最后一次 ip
-            if day not in ums_login:
-                ums_login[day] = {ums: ip}
-            else:
-                ums_login[day][ums] = ip
+            UMS_LOGIN.setdefault(day, defaultdict(list))
+            # 记录所有登录IP
+            UMS_LOGIN[day][ums].append(ip)
 
         elif 'uid enter game vfd=' in line:
             date = get_ret(date_pat, line)
     """
     logdir = args[1]
     server_id = args[2]
-    import pdb;pdb.set_trace()
     for fn in os.listdir(logdir):
         fp = os.path.join(logdir, fn)
         print 'parsing', fp
-        fobj = get_fobj_from_tarfile(fp)
+
+        if tarfile_pat.search(fp):
+            fobj = get_fobj_from_tarfile(fp)
+        else:
+            fobj = file(fp)
+
         if 'login_' in fn:
             print 'enter login logic'
             parse_login(fobj)
         elif 'mission_' in fn:
             print 'enter mission logic'
             parse_datu(fobj)
+        elif 'mini' in fn:
+            print 'entering miniclient log logind'
+            parse_minilog(fobj)
         fobj.close()
         del fobj
 
     dump_obj('%s-uid_ums_map.pkl' % server_id, UID_UMS_MAP)
     dump_obj('%s-ums_uid_map.pkl' % server_id, UMS_UID_MAP)
-    dump_obj('%s-watu.pkl'% server_id, WATU)
-    dump_obj('%s-datu.pkl' % server_id, DATU)
+    dump_obj('%s-watu.pkl'% server_id, WA_TU)
+    dump_obj('%s-datu.pkl' % server_id, DA_TU)
     dump_obj('%s-uid_login.pkl' % server_id, UID_LOGIN)
-    dump_obj('%s-ums_login.pkl' % server_id, ums_login)
+    dump_obj('%s-ums_login.pkl' % server_id, UMS_LOGIN)
+    dump_obj('%s-miniclient_login.pkl' % server_id, MINICLIENT_LOGIN)
 
-def cmd_output(args, opts):
+def cmd_tongji_cangbao(args, opts):
     '''output log statistics'''
     date_tuple = (
             ('2012-05-03', '2012-05-09'),
     server_ids = ('1036', '1037')
     for id in server_ids:
         for start_date, end_date in date_tuple:
-            tongji(id, start_date, end_date)
+            tongji_cangbao(id, start_date, end_date)
 
     for start_date, end_date in date_tuple[1:]:
-        tongji('1052', start_date, end_date)
+        tongji_cangbao('1052', start_date, end_date)
 
 def cmd_help(args, opts):
     """help - list available commands"""
 def get_commands():
     return {'help': cmd_help,
             'parse': cmd_parse,
-            'output': cmd_output}
+            'tongji_cangbao': cmd_tongji_cangbao}
 
 def parse_opts():
     usage = "%prog [options] <command> [arg] ..."