Source

woocode / py / sa / statistics / sum.py

Full commit
#!/usr/bin/env python
# -*- encoding:utf-8 -*-
import tarfile
import re
import sys
import os
import datetime
import sqlite3
from collections import defaultdict

from utils import dump_obj
from pprint import pprint

dbfile = 'data.sqlite3'
conn = sqlite3.connect(dbfile)
cur = conn.cursor()

#if os.path.exists(dbfile):
    #os.remove(dbfile)

def init_db():
    cur.execute('''CREATE TABLE `all_login` (
            id  INTEGER PRIMARY KEY AUTOINCREMENT,
            uid INTEGER,
            date TEXT,
            ums TEXT)''')
    cur.execute('''CREATE TABLE `mini_login` (
            id  INTEGER PRIMARY KEY AUTOINCREMENT,
            uid INTEGER,
            date TEXT,
            ums TEXT
            )''')
    cur.execute('''CREATE TABLE `nor_login` (
            id  INTEGER PRIMARY KEY AUTOINCREMENT,
            uid INTEGER,
            date TEXT,
            ums TEXT
            )''')
    cur.execute('''CREATE TABLE `statistics` (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            date TEXT UNIQUE,
            nor_ums TEXT,
            mini_ums TEXT,
            mini_percent TEXT)''')
    conn.commit()

def get_logobj_from_tarfile(filename):
    '''从tar文件里获取日志文件对象'''

    tf = tarfile.open(filename)
    for member in tf.members:
        # if member.name == 'mini_stat_access.run':
        yield LogLineGenerator(tf.extractfile(member))

class LogLineGenerator(object):
    def __init__(self, fobj, log_regpat=None):
        self.log_regpat = log_regpat
        if isinstance(fobj, (file, tarfile.ExFileObject)):
            self.fobj = fobj
        elif isinstance(fobj, str):
            self.fobj = open(fobj)
        else:
            raise TypeError('fobj only support file object or file path. Got: %r' % type(fobj))

    def get_loglines(self):
        for line in self.fobj:
            line = line.strip()
            yield line

def get_ret(pat, s):
    m = pat.search(s)
    if m:
        return m.group(1)

def get_logline_from_files(files, date_pat):
    start_day = datetime.datetime(2012, 5, 1)
    end_day = datetime.datetime(2012, 6, 13)
    for logfile in files:
        print 'parsing logfile %s' % logfile
        for logobj in get_logobj_from_tarfile(logfile):
            n = 1
            for line in logobj.get_loglines():
                sys.stdout.write('line: %d\r' % n)
                sys.stdout.flush()
                n += 1
                #dte = date_pat.search(line).group(1)
                dte = get_ret(date_pat, line)
                dte_obj = datetime.datetime.strptime(dte, '%Y-%m-%d %H:%M:%S')
                if dte_obj < start_day or dte_obj > end_day:
                    continue
                yield line

def main():

    tar_dir = sys.argv[1]
    all_login_logfiles = [os.path.join(tar_dir, f) for f in os.listdir(tar_dir)
            if f.find('login_') != -1]
    mini_login_logfiles = [os.path.join(tar_dir, f) for f in os.listdir(tar_dir)
            if f.find('mini_client') != -1]
    # 所有的登录信息
    #all_login_info = defaultdict(list)
    #mini_login_info = defaultdict(list)
    date_pat = re.compile(r'\[([-\d:\s]+)\]')
    ums_pat = re.compile(r'ums=(\w+?),')
    uid_pat = re.compile(r'uid=(\d+)')

    for line in get_logline_from_files(all_login_logfiles, date_pat):
        if 'uid enter game' not in line:
            continue

        dte = get_ret(date_pat, line)
        day = datetime.datetime.strptime(dte, '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%d')
        ums = get_ret(ums_pat, line)
        uid = get_ret(uid_pat, line)
        cur.execute('SELECT uid from `all_login` WHERE date=:date and ums=:ums and uid=:uid',
                {'date': day, 'ums': ums, 'uid': uid})
        r = cur.fetchone()
        if r:
            continue
        cur.execute('INSERT INTO `all_login` (date, ums, uid) values (?, ?, ?)', (day, ums, uid))
        conn.commit()
    print 'all login info finished'

    mini_pat = re.compile(r'\[(.+?)\]')
    for line in get_logline_from_files(mini_login_logfiles, date_pat):
        parts = mini_pat.findall(line)
        uid = parts[1]
        dte = parts[0]
        day = datetime.datetime.strptime(dte, '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%d')
        #dte_obj = datetime.datetime.strptime(dte, '%Y-%m-%d %H:%M:%S')
        cur.execute('SELECT uid from `mini_login` WHERE uid=? and date=?', (uid, day))
        r = cur.fetchone()
        if r:
            continue
        r = cur.fetchone()
        cur.execute('SELECT ums from `all_login` WHERE uid=?', (uid,))
        r = cur.fetchone()
        if r:
            ums = r[0]
            cur.execute('SELECT ums from `mini_login` WHERE date=:date and uid=:uid', {'date': day, 'uid': uid})
            r = cur.fetchone()
            if r:
                continue
            cur.execute('INSERT INTO `mini_login` (date, ums, uid) values (?, ?, ?)', (day, ums, uid))
            #conn.commit()
        else:
            cur.execute('INSERT INTO `mini_login` (date, ums, uid) values (?, ?, ?)', (day, '', uid))
        conn.commit()
    print 'mini login info finished'

    cur.execute('SELECT uid, date, ums from `all_login`')
    datas = cur.fetchall()
    for data in datas:
        cur.execute('SELECT uid, date from `mini_login` WHERE uid=? and date=?', (data[0], data[1]))
        r = cur.fetchone()
        if not r:
            cur.execute('INSERT INTO `nor_login` (uid, date, ums) VALUES (?, ?, ?)', data)
            conn.commit()


if __name__ == "__main__":
    #init_db()
    main()
    conn.close()