Commits

yang xiaoyong committed cb3e7bf Draft

能按周统计数据

  • Participants
  • Parent commits a7a5f49

Comments (0)

Files changed (2)

File do_statistics.py

 def load_obj(pkl):
     return cPickle.load(file(pkl, 'rb'))
 
+def calc_percent(num, all):
+    ret = int(num) * 1.0 / int(all) * 100
+    return '%d%%' % ret
+
 def load_obj_new(pkl):
     print 'loading %s' % pkl
     obj = cPickle.load(file(pkl, 'rb'))
     tt = time.strptime(s, '%Y-%m-%d')
     return datetime.date(*tt[:3])
 
-def login_stat_by_week(server_id):
+def login_stat_by_week(server_id, week_num):
 
     mini_login_stat = load_obj_new('%s-miniclient_login.pkl' % server_id)
     uid_ums_map = load_obj_new('%s-uid_ums_map.pkl' % server_id)
     #ums_login = load_obj('%s-ums_login.pkl' % server_id)
     uid_login = load_obj_new('%s-uid_login.pkl' % server_id)
-
     mini_uids = set()
-    stats = defaultdict(dict)
 
     [mini_uids.update(v.keys()) for k, v in mini_login_stat.iteritems()]
-    err_ids = [u for u in mini_uids if not u.isdigit or not u]
-    if err_ids:
-        print 'Got erro ids %s' % err_ids
-        for _id in err_ids:
-            mini_uids.remove(_id)
 
+    mini_ums_login_stat = defaultdict(set)
+    norm_ums_login_stat = defaultdict(set)
     for date, items in uid_login.iteritems():
         tt = str2date(date)
+        week = tt.strftime('%W')
+        if week != week_num:
+            continue
         for uid, count in items.iteritems():
             # 如果id出现在mini id 集合中,判定它的ums为微端登录
             ums = uid_ums_map.get(uid, 'unknown_uid-%s' % uid)
             if uid in mini_uids:
-                stats[tt.strftime('%W')].setdefault('mini_ums_login', 1)
-                stats[tt.strftime('%W')]['mini_ums_login'] += 1
+                mini_ums_login_stat[date].add(ums)
             else:
-                stats[tt.strftime('%W')].setdefault('norm_ums_login', 1)
-                stats[tt.strftime('%W')]['norm_ums_login'] += 1
+                norm_ums_login_stat[date].add(ums)
 
-    return (server_id, stats)
+    #import pdb;pdb.set_trace()
+    mini_ums_login_counts = sum([len(v) for k, v in mini_ums_login_stat.iteritems()])
+    norm_ums_login_counts = sum([len(v) for k, v in norm_ums_login_stat.iteritems()])
+    if not mini_ums_login_counts :
+        import pdb;pdb.set_trace()
+    return (mini_ums_login_counts, norm_ums_login_counts)
+
 
 # 统计每个服的微端/正常版登录数量
 def login_stat(server_id, start_date, end_date):
     return (server_id, (mini_ums_login_counts, norm_ums_login_counts))
 
 def stat(server_ids):
-    
-    _stat = [login_stat_by_week(i) for i in server_ids]
+
     fobj = open('%s-result.csv' % ('-'.join(server_ids)), 'wb')
     header = 'weeknum,mini_ums_login (%),norm_ums_login\n'
     fobj.write(header)
-    _results = defaultdict(dict)
-    for _id, data in _stat:
-        weeks = sorted(data.keys())
-        for week in weeks:
-            if week not in ['25', '24', '26']:
-                print 'ignore %s' % week
-                continue
-            _results[week].setdefault('mini_ums_login', 0)
-            _results[week].setdefault('norm_ums_login', 0)
-            _mini = data[week].get('mini_ums_login', 0)
-            _norm = data[week].get('norm_ums_login', 0)
-
-            _results[week]['mini_ums_login'] += _mini
-            _results[week]['norm_ums_login'] += _norm
-    for week, items in _results.iteritems():
-        _mini = items['mini_ums_login']
-        _norm = items['norm_ums_login']
-        _mini_perc = "%s (%d%%)" % (_mini, (_mini * 1.0 / (_mini+_norm) * 100))
-        fobj.write('"%s","%s","%s"\n' % (week, _mini_perc, _norm))
+    now = datetime.datetime.now()
+    week_num = now.strftime('%W')
+    latest_weeks = []
+    for i in range(1, 4):
+        latest_weeks.append('%s' % (int(week_num) - i))
+    for week in latest_weeks:
+        _stat = [login_stat_by_week(i, week) for i in server_ids]
+        print _stat
+        mini_ums_login_counts = sum([i[0] for i in _stat])
+        norm_ums_login_counts = sum([i[1] for i in _stat])
+        perc = calc_percent(mini_ums_login_counts, mini_ums_login_counts + norm_ums_login_counts)
+        fobj.write('"%s","%s %s","%s"\n' % (week, mini_ums_login_counts, perc, norm_ums_login_counts))
     fobj.close()
-    # mini_login_counts = sum([i[1][0] for i in _stat])
-    # norm_login_counts = sum([i[1][1] for i in _stat])
-    # login_counts = mini_login_counts + norm_login_counts
-    # print _stat
-    # return (mini_login_counts, norm_login_counts, login_counts, mini_percent)
 
 def main():
     files = os.listdir('.')
     server_ids = set()
     server_ids.update([id_pat.search(f).group(1) for f in files if id_pat.search(f)])
 
-    min_date = str2date('1999-01-01')
-    max_date = str2date('2012-12-30')
-    mid_date = str2date('2012-03-01')
+    #min_date = str2date('1999-01-01')
+    #max_date = str2date('2012-12-30')
+    #mid_date = str2date('2012-03-01')
 
     new_server_ids = ['1054', '1053', '1052', '1051', '1050']
     old_server_ids = ['1049', '1048', '1037', '1036', '1034']
 用于日志分析和统计
+
+日志分析
+- 扫描日志
+- 生成pickle对象
+- 分析之
+- 统计