Source

woocode / py / sa / statistics / machine_info_statistics.py

Full commit
#!/usr/bin/env python
# -*- encoding:utf-8 -*-
import tarfile
import re
import time
import sys

from utils import get_files_by_ext

def get_logobj_from_tarfile(filename):
    '''从tar文件里获取日志文件对象'''

    tf = tarfile.open(filename)
    for member in tf.members:
        # if member.name == 'mini_stat_access.run':
        yield LogLineGenerator(tf.extractfile(member))

class LogLineGenerator(object):
    def __init__(self, fobj, log_regpat=None):
        self.log_regpat = log_regpat
        if isinstance(fobj, (file, tarfile.ExFileObject)):
            self.fobj = fobj
        elif isinstance(fobj, str):
            self.fobj = open(fobj)
        else:
            raise TypeError('fobj only support file object or file path. Got: %r' % type(fobj))

    def get_loglines(self):
        for line in self.fobj:
            line = line.strip()
            yield line

def main():

    tar_dir = sys.argv[1]
    tar_files = get_files_by_ext(tar_dir, '.tar.gz')
    uid_pat = re.compile(r'uid\=(.+?),')
    machine_pat = re.compile(r'MachineInfo=(.+)')

    cpu_pat = re.compile(r'(cpu.*?),')
    device_pat = re.compile(r'device(\[.+?\]),')
    #memory_pat = re.compile(r'memory\[(.+?)\]')
    memory_pat = re.compile(r'mem\-physic:(.+?),')
    screen_pat = re.compile(r'screen\-(.+?),')
    window_pat = re.compile(r'window:(.+)')

    #fp = open('statistics.csv', 'wb')
    fp_map = {'memory': open('memory.csv', 'wb'),
              'cpu': open('cpu.csv', 'wb'),
              'device': open('gpu_device.csv', 'wb'),
              'screen': open('screen.csv', 'wb'),
              'window': open('window.csv', 'wb'),
              #'all': open('statistics.csv', 'wb'),
              }
    fp_all = open('statistics.csv', 'wb')
    fp_all.write('device,memory,cpu,screen,window\n')
    for fp in fp_map:
        fp_map[fp].write('%s,sum\n' % fp)
    maps  = [{}, {}, {}, {}, {}]
    for tar_file in tar_files:
        print tar_file
        start_time = time.time()
        for logobj in get_logobj_from_tarfile(tar_file):
            for line in logobj.get_loglines():
                m = machine_pat.search(line)
                if m:
                    #uid = uid_pat.search(line).group(1)
                    machine_info = m.group(1).strip()

                    cpu = cpu_pat.search(machine_info).group(1).strip()
                    device = device_pat.search(machine_info).group(1).strip()
                    memory = memory_pat.search(machine_info).group(1).strip()
                    screen = screen_pat.search(machine_info).group(1).strip()
                    window = window_pat.search(machine_info).group(1).strip()

                    li = [device, memory, cpu, screen, window]
                    line = ','.join(['"%s"' % i for i in li]) + '\n'
                    for index, dct in enumerate(maps):
                        if li[index] not in maps[index]:
                            maps[index][li[index]] = 1
                        else:
                            v = maps[index][li[index]]
                            maps[index][li[index]] = v + 1
                    fp_all.write(line)
                    #print li

    def co(m):
        lines = ''
        for k, v in m.iteritems():
            lines += ','.join([k, str(v)]) + '\n'
        return lines
    #import pdb;pdb.set_trace()
    fp_all.close()
    fp_map['device'].write(co(maps[0]))
    fp_map['memory'].write(co(maps[1]))
    fp_map['cpu'].write(co(maps[2]))
    fp_map['screen'].write(co(maps[3]))
    fp_map['window'].write(co(maps[4]))
    for fp in fp_map:
        fp_map[fp].close()
    print 'elapsed %.2f' % (time.time() - start_time)

if __name__ == '__main__':
    main()