Source

woocode / py / report.py

Full commit
#!/usr/bin/env python
# -*- encoding:utf8 -*-

import os
import sys
import datetime
import csv
import re
import operator
from collections import defaultdict

from utils import zipfiles, sendmail
from config import CWD, MAIL_CONFIG, MAIL_LIST, day_fmt, to_gbk

UPLOAD_DIR = '/home/backups/uploads/game/'
HOST_IDS = os.listdir(UPLOAD_DIR)

today = datetime.datetime.today()
yesterday = today - datetime.timedelta(1)

class SumData(object):
    def __init__(self, csv_files, rules={}, has_header=True):
        self.csv_files = csv_files
        self.has_header = has_header
        self.data = defaultdict(int)
        self.rules = rules
        self.titles = []
        self.get_titles()
        self.sum_data()

    def get_titles(self):
        filename = self.csv_files[0]
        with open(filename, 'rb') as fb:
            csv_reader = csv.reader(fb)
            if self.has_header:
                header = csv_reader.next()
                self.titles.append(header)
            for row in csv_reader:
                self.titles.append(row[0])

    def sum_data(self):
        for csv_file in self.csv_files:
            with open(csv_file, 'rb') as fb:
                csv_reader = csv.reader(fb)
                print 'read %s' % csv_file
                if self.has_header:
                    row = csv_reader.next()
                for row in csv_reader:
                    self.data[row[0]] += int(row[1])

    def as_csv(self, csv_file):
        '''output as csv file'''
        rules = self.rules
        with open(csv_file, 'wb') as fb:
            writer = csv.writer(fb)
            titles = self.titles

            if self.has_header:
                writer.writerow(titles[0])
                titles = titles[1:]

            for title in titles:
                row = [title, self.data[title]]

                if not title in rules:
                    value = 0
                    for _title, rule in rules.iteritems():
                        if callable(rule):
                            if rule(self.data[title]):
                                den = self.data[_title]
                                if den == 0:
                                    print >> sys.stderr, "ZeroDivisionError: %s" % title.decode('gb18030')
                                else:
                                    value = '%.2f%%' % (self.data[title] * 1.0 / den * 100)
                        elif isinstance(rule, dict):
                            value = rule.get('value', 0)
                        else:
                            raise TypeError('rule only support dict, func type!')

                    row.append(value)
                writer.writerow(row)

class SumMoreCsv(object):
    def __init__(self, csv_files, fields=None, has_header=True):
        self.fields = fields
        self.has_header = has_header
        self._data = [self.csv_to_dict(f) for f in csv_files]
        self.titles = []
        self.data = {}

    def csv_to_dict(self, csv_file):
        data = {}
        with open(csv_file, 'rb') as fb:
            csv_reader = csv.reader(fb)

            if self.has_header:
                csv_reader.next()

            for row in csv_reader:

                if self.fields is not None:
                    if not row[0] in self.fields:
                        continue

                if row[0] not in self.titles:
                    self.titles.append(row[0])

                data[row[0]] = int(row[1])

        return data

    def sum(self, oper='add'):
        for title in self.titles:
            self.data = reduce(getattr(operator, oper),
                               [d.get(title, 0) for d in self._data])
        return self.data

def main():
    ids = []
    csv_files = []
    output_file = os.path.join(CWD, 'report.zip')
    for host_id in HOST_IDS:
        csv_dir = os.path.join(UPLOAD_DIR, host_id, today.strftime('%Y%m%d'), 'res')
        if not os.path.exists(csv_dir):
            print "[WARN] can not found directory: %s" % (csv_dir)
            continue
        files = [os.path.join(csv_dir, f) for f in os.listdir(csv_dir)
                 if f.endswith('.csv') and re.search(r'\d+-%s' % yesterday.strftime(day_fmt), f)]
        if files:
            ids.append(host_id)

        csv_files.extend(files)

    ids.sort()
    is_economy = lambda x: re.compile(r'^economy').search(x)
    sum_files = [os.path.join(CWD, 'game_sum.csv'),
                 os.path.join(CWD, 'economy_sum.csv'),]

    # 游戏玩法统计
    game_sum = SumData([f for f in csv_files if not is_economy(os.path.basename(f))],
                       rules={to_gbk(u'登录uid数'): lambda x: True})

    game_sum.as_csv(sum_files[0])

    # 经济系统统计
    economy_sum = SumData([f for f in csv_files if is_economy(os.path.basename(f))],
                          rules={to_gbk(u'总回收'): lambda x: x <= 0,
                                 to_gbk(u'总产出'): lambda x: x >0,
                                 to_gbk(u'金钱当天结余'): lambda x: False})

    economy_sum.as_csv(sum_files[1])

    csv_files.extend(sum_files)
    print csv_files
    zipfiles(output_file, csv_files)

    has_send = False
    retry_times = 3
    while retry_times > 0:
        retry_times -= 1

        try:
            sendmail(MAIL_CONFIG['smtp_username'],
                     MAIL_LIST,
                     '%s 日志扫档统计报告' % yesterday.strftime(day_fmt),
                     'Hi, all\n\n 每天日志扫档汇总,详细请下载邮件附件查看。\n\n 扫描服务器列表: \n%s ' % '\n'.join(ids),
                     attachments=[output_file])
            has_send = True
        except Exception, e:
            print >> sys.stderr, "[ERR] Error occured, re-try %d" % retry_times
            print >> sys.stderr, e

        if has_send:
            break

    if not has_send:
        print >> sys.stderr, "[ERR] Some error occured when sending email."

if __name__ == '__main__':
    main()