Source

logparser / sum_mini.py

Full commit
#!/usr/bin/env python
# -*- encoding:utf-8 -*-

import sys
import re
import os
import hashlib
from datetime import datetime

from collections import defaultdict

from utils import get_files, dump_obj, get_lines, \
        get_fobj_from_tarfile, get_ret

'''
统计每服务器正常版登录UID

ums_uid_map = {'ums': [uid]}
'''

UMS_UID_MAP = defaultdict(set)
UID_UMS_MAP = {}

ums_pat = re.compile(r'ums=(.+?),')
uid_pat = re.compile(r'uid=(\d+)')
vfd_pat = re.compile(r'vfd=(\d+?),')
date_pat = re.compile(r'\[([-\d:\s]+)\]')
ip_pat = re.compile(r'ip=([\d\.]+)')
key_pat = re.compile(r'key=(.+?),')
miniclient_pat = re.compile(r'\[(.+?)\]')
datetime_fmt = '%Y-%m-%d %H:%M:%S'
day_fmt = '%Y-%m-%d'
hour_fmt = '%Y-%m-%d %H'
minute_fmt = '%Y-%m-%d %H:%M'

def get_dateobj_from_str(s, fmt=datetime_fmt):
    return datetime.strptime(s, fmt)

def get_server_id(path):
    pat = re.compile(r'(\d+?)\-')
    bn = os.path.basename(path)
    m = pat.search(bn)
    server_id = m.group(1)
    return server_id

def hash_obj(s, size=512*1024):
    m = hashlib.md5()
    if isinstance(s, file):
        while True:
            c = s.read(size)
            if not c: break
            m.update(c)
    elif isinstance(s, str):
        m.update(s)
    return m.hexdigest()


def parse_login_log(logobj):
    '''
    logged_info
    { 'date': {hash('vfd+ums+date'): {'uid': [], 'ums': '', 'ip': []}

    }
    '''
    logged_info = defaultdict(dict)

    count = 1
    for line in logobj:
        sys.stdout.write('parse line :%d\r' % count)
        sys.stdout.flush()
        count += 1

        if 'uid enter game' in line:
            date = get_ret(date_pat, line)
            if not date:
                continue
            date_obj = get_dateobj_from_str(date)
            day_str = date_obj.strftime(day_fmt)
            uid = get_ret(uid_pat, line)
            vfd = get_ret(vfd_pat, line)
            ums = get_ret(ums_pat, line)
            try:
                hash_key = hash_obj(vfd+ums+day_str)
            except:
                import pdb;pdb.set_trace()
            # 同一分钟内,用户可能会用多个uid登录服务器
            # 如果不存在当前的记录,初始化
            if hash_key not in logged_info[day_str]:
                logged_info[day_str][hash_key] = {'vfd': vfd,
                                         'ums': ums,
                                         'uids': [uid],
                                         'ips': []}
            # 否则只需要增加新进来的uid
            else:
                logged_info[day_str][hash_key]['uids'].append(uid)
            UMS_UID_MAP[ums].add(uid)
            UID_UMS_MAP.setdefault(uid, ums)

        elif 'client use loginkey ok vfd=' in line:
            date = get_ret(date_pat, line)
            if not date:
                continue
            date_obj = get_dateobj_from_str(date)
            day_str = date_obj.strftime(day_fmt)
            key = get_ret(key_pat, line).split(':')
            vfd = get_ret(vfd_pat, line)
            ip = get_ret(ip_pat, line)
            #server_id = key[1]
            ums = key[2]
            hash_key = hash_obj(vfd+ums+day_str)
            if hash_key not in logged_info[day_str]:
                logged_info[day_str][hash_key] = {
                                         'vfd': vfd,
                                         'ums': ums,
                                         'uids': [],
                                         'ips': [ip]}
            else:
                logged_info[day_str][hash_key]['ips'].append(ip)
    return logged_info

def login_log(all_logfiles):
    server_map = {}
    for fn in all_logfiles:
        print 'parse logfile: %s' % fn
        server_id = get_server_id(fn)
        fobjs = get_fobj_from_tarfile(fn, 'login.log')
        for fobj in fobjs:
            logobj = get_lines(fobj)
            all_login_info = parse_login_log(logobj)
            if server_id not in server_map:
                server_map[server_id] = [all_login_info]
            else:
                server_map[server_id].append(all_login_info)
    dump_obj('ums_uid_map.pkl', UMS_UID_MAP)
    dump_obj('uid_ums_map.pkl', UID_UMS_MAP)
    dump_obj('all_login.pkl', server_map)

def parse_minclient_log(logobj):
    count = 1
    error_lines = 0
    logged_info = defaultdict(dict)

    for line in logobj:
        sys.stdout.write('parse line :%d\r' % count)
        sys.stdout.flush()
        count += 1
        if miniclient_pat.search(line):
            parts = miniclient_pat.findall(line)
            date = parts[0]
            if len(parts) < 4:
                error_lines += 1
                continue
            try:
                date_obj = get_dateobj_from_str(date)
            except:
                error_lines += 1
                continue
            day_str = date_obj.strftime(day_fmt)
            logged_info[day_str]

            uid = parts[1]
            ip = parts[3]
            # uid + day 进行hash
            hash_key = hash_obj(uid+day_str)
            if hash_key not in logged_info[day_str]:
                logged_info[day_str][hash_key] = [ip]
            else:
                logged_info[day_str][hash_key].append(ip)
        else:
            error_lines += 1
    if error_lines > 0:
        print 'error lines: %d' % error_lines
    return logged_info

def parse_mini_logfiles(mini_logfiles):
    mini_map = {}
    for fn in mini_logfiles:
        server_id = get_server_id(fn)
        print 'parse logfile: %s' % fn
        fobjs = get_fobj_from_tarfile(fn, 'mini_client.log')
        for fobj in fobjs:
            logobj = get_lines(fobj)
            mini_login_info = parse_minclient_log(logobj)
            if server_id not in mini_map:
                mini_map[server_id] = [mini_login_info]
            else:
                mini_map[server_id].append(mini_login_info)

    dump_obj('mini_login.pkl', mini_map)

def main():
    log_dir = sys.argv[1]
    #all_logfiles = get_files(log_dir, 'login_')
    mini_logfiles = get_files(log_dir, 'mini_client_')
    parse_mini_logfiles(mini_logfiles)
    #dump_obj('mini_log_info.pkl', mini_log_info)

if __name__ == '__main__':
    main()