Source

woocode / py / sa / simple_cdn_check.py

Full commit
#!/usr/bin/env python
# -*- encoding:utf-8 -*-

import os
import sys
import re
import traceback
import urllib2
import threading
import Queue
import logging
import time

cwd = os.path.abspath(os.path.dirname(__file__))
logfile = os.path.join(cwd, os.path.splitext(__file__)[0] + '.log')
logging.basicConfig(filename=logfile,
                    level=logging.INFO)

# DONE: 构建一个多线程的模型,支持传递线程数量
# DONE: 确保URL请求的是HEAD方法

cdn_ips = (
           '61.187.102.68',
           '218.60.35.175',
           '218.8.52.229',
           '61.138.133.14',
           '221.204.210.146',
           '221.204.210.147',
           '60.5.252.167',
           '221.195.3.139',
           '60.210.16.87',
           '60.210.16.88',
           '60.165.99.197',
           '222.208.168.214',
           '222.208.168.215',
           '218.6.12.169',
           # '222.186.34.47' --> '222.186.58.206',
           '222.186.58.206',
           '58.221.38.68',
           # '123.183.210.136', tempory remove
           '60.173.11.162',
           '59.53.65.14',
           # '61.183.42.56' -->'61.183.41.216',
           '61.183.41.216',
           '121.9.221.88',
           '121.9.221.89',
           '220.165.3.73',
           # '218.29.176.196' --> '42.227.234.137',
           '42.227.234.137',
           '218.77.101.34')

def sanity(ver):
    '''
    >>> sanity('mini_cn_v_2_9_0')
    ('mini_cn', 'v', '2.9.0')
    >>> sanity('v_2_9_0')
    ('mini_cn', 'v', '2.9.0')
    >>> sanity('v2.9.0')
    ('mini_cn', 'v', '2.9.0')
    >>> sanity('2.9.0')
    ('mini_cn', 'v', '2.9.0')
    '''
    t = 'mini_cn'
    v = '2.9.0'
    pat1 = re.compile(r'(\w+_\w+)_v_(\d+[_\.]\d+[_\.]\d+)')
    pat2 = re.compile(r'(v[_\.])?(\d+[\._]\d+[\._]\d+)')
    pat3 = re.compile(r'_')
    if pat1.search(ver):
        t, v = pat1.search(ver).groups()
    elif pat2.search(ver):
        v = pat2.search(ver).group(2)
    else:
        raise TypeError('version type error %r' % ver)
    v = pat3.sub(r'.', v)
    return (t, 'v', v)

def get_resources(path, excludes=[]):
    '''
    返回一个资源路径下所有的资源地址
    地址格式为相对路径

    '''

    if os.path.exists('obj.pkl'):
        import pickle
        with open('obj.pkl', 'rb') as fb:
            resources = pickle.load(fb)
            for rsc in resources:
                relpath = rsc[len(path):].lstrip('/')
                if relpath.startswith('/'):
                    relpath = relpath[1:]
                pardir = relpath.split('/', 1)[0]
                if pardir in excludes:
                    continue
                yield relpath
    else:
        for root, dirs, files in os.walk(path):
            for fn in files:
                yield os.path.join(root, fn)[len(path):].lstrip('/')

class ThreadUrlFetch(threading.Thread):
    def __init__(self, fetch, queue):
        threading.Thread.__init__(self)
        self.fetch = fetch
        self.queue = queue

    def run(self):
        while True:
            request = self.queue.get(1)
            status = 'ERR'
            try:
                u = self.fetch.open(request)
                status = u.msg
                assert u.code == 200, "server response code not equal 200"
                msg = "{0} - [{1}] {2:<15} {3}".format(self.getName(),
                                                       status,
                                                       self.fetch.handlers[0].proxies['http'],
                                                       request.get_full_url())
                # if request.get_method() == 'GET':
                #     # TODO: 可以校验内容
                #     raise ValueError('Need to implement')
                u.close()

            except urllib2.HTTPError:
                exc_type, exc_value, exc_traceback = sys.exc_info()
                formatted_lines = traceback.format_exc().splitlines()
                print '*' * 30
                msg = "{0} - [{1}] {2:<15} {3} [{4}]".format(self.getName(),
                                                             status,
                                                             self.fetch.handlers[0].proxies['http'],
                                                             request.get_full_url(),
                                                             formatted_lines[-1])

                print >> sys.stderr, msg
                print '*' * 30
                logging.error(msg)
            finally:
                self.queue.task_done()

class HeadRequest(urllib2.Request):
    '''HEAD Request'''
    def get_method(self):
        return "HEAD"

def main():
    start = time.time()

    # 默认开启的线程数量
    threads_num = 10
    # 默认的版本
    _version = '2.11.0'
    # CDN 下载地址
    cdn_link = 'http://download1.fs.175game.com'
    # 资源根路径
    resource_root_dir = '/home/miniclient/rsync_input/mini.fs.175game.com/mini_cn'
    if len(sys.argv) < 2:
        print >> sys.stderr, "Please specified version, like: v2.11.0"
        sys.exit(-1)
    else:
        _version = sys.argv[1]


    client_type, _v, version = sanity(_version)
    underline_version = version.replace('.', '_')
    print "Use version: %s_%s_%s" % (client_type, _v, underline_version)
    print "Use threads: %d" % threads_num
    # client_type = 'mini_cn'

    cdn_proxies = ({'http': ip} for ip in cdn_ips)

    # 资源发布路径
    resource_pub_dir = 'fs/%s_v_%s' % (client_type, underline_version)

    resources = get_resources(resource_root_dir, ['resource'])
    resources = list(resources)
    print 'grab url resources: %d' % len(resources)
    resource_links = ['/'.join([cdn_link, resource_pub_dir, r]) for r in resources]
    # resource_links = ['http://download1.fs.175game.com/fs/mini_cn_v_2_11_0/client/bin/release/fsresd.exe']

    for proxy in cdn_proxies:
        _start = time.time()
        # print 'Starting check proxy [%s]' % proxy['http']
        proxy_opener = urllib2.build_opener(urllib2.ProxyHandler(proxy))
        queue = Queue.Queue()
        [queue.put(HeadRequest(u)) for u in resource_links]
        for i in range(threads_num):
            t = ThreadUrlFetch(proxy_opener, queue)
            t.setDaemon(True)
            t.start()
        queue.join()
        print 'Done [%s] %.2f seconds' % (proxy['http'].ljust(12), time.time() - _start)

    print 'Finished'
    print 'Time elapsed %.2f seconds' % (time.time() - start)
if __name__ == '__main__':
    main()