Created by Denis Kirichenko 2020-07-23
stat_api.py
      version = '2020_06_13_00:00'
import re
from collections import defaultdict
from itertools import count
from datetime import time

# stat_data = defaultdict(lambda: defaultdict(dict))
# stat_data = defaultdict(lambda: defaultdict(lambda: {'count': 0, 'processtime': 0.0}))
stat_data = defaultdict(lambda: {'count': 0, 'processtime': 0.0})
celery_stat_data = defaultdict(lambda: {'count': 0, 'processtime': 0.0})


EXTRACT_CTX_CLASS = re.compile(r'^.+\"ctx_class\"\: (?P<ctx_name>[^\,]+),.+\"processtime\"\: (?P<processtime>[^\,]+),.+$')
# May 27 06:25:10
EXTRACT_CTX_CLASS_2 = re.compile(r'^(?P<month>[^\s]+)\s(?P<day>[^\s]+)\s(?P<str_time>[^\s]+)\s.+\"ctx_class\"\: (?P<ctx_name>[^\,]+),.+\"http_method\": (?P<http_method>[^\,]+),.+\"processtime\"\: (?P<processtime>[^\,]+),.+$')
# EXTRACT_CTX_CLASS_3 = re.compile(r'^(?P<datetime>[^\s]+\s[^\s]+\s[^\s]+)\s.+\"ctx_class\"\: (?P<ctx_name>[^\,]+),.+\"processtime\"\: (?P<processtime>[^\,]+),.+$')
a = 'May 27 06:26:34 qa requests: {"user_id": 0, "msg": "", "ctx_class": "NovakidRoot", "ctx_name": "", "getparameters": "", "http_method": "GET", "ip": "54.72.126.205", "pid": 25977, "processtime": 0.000811, "replycode": 200, "req_body": "", "time": 1590560794.5993605, "url": "/", "user_agent": "Zabbix"}'
b = EXTRACT_CTX_CLASS.match(a)
CELERY_TASKS = re.compile(r'^(?P<month>[^\s]+)\s(?P<day>[^\s]+)\s(?P<str_time>[^\s]+).*celery:.*\"processtime\"\: (?P<processtime>[^\,]+),.*\"task\"\: (?P<task>[^\,]+).*$')
celery = """Jul 22 12:20:13 qa novakid.backend.celery: {"user_id": -1, "msg": "Task finished", "error": "", "pid": 15854, "processtime": 0.006001, "task": "novakid.backend.facebook.tasks.process_lead", "task_args": "({'created_time': '2020-05-31T13:36:24+0000', 'id': '550532535659457', 'ad_id': '23844785264560091', 'form_id': '1168906423443622', 'field_data': [{'name': 'full_name', 'values': ['Petra Zemkov\u00e1']}, {'name': 'phone_number', 'values': ['+420774020277']}, {'name': 'email', 'values': ['zemkova.petra@gmail.com']}]},)", "task_id": "c027e67f-414d-4cdc-805a-c4163c50969e", "task_kwargs": "{'facebook_app_name': 'global_page'}"}"""

#Jul 22 12:27:00 qa novakid.backend.utils: {"user_id": -1, "msg": "used_memory_vms::before::novakid.backend.classes.tasks.other_tasks.sync_class_pings_through_redis 484 MB", "pid": 15854}
#Jul 22 12:25:00 qa novakid.backend.celery: {"user_id": -1, "msg": "Task added", "pid": 15401, "task": "novakid.backend.classes.tasks.other_tasks.sync_class_pings_through_redis", "task_args": "()", "task_id": "0a9d7063-8a25-4526-bc28-586de087684b", "task_kwargs": "{}"}
#Jul 22 12:20:13 qa novakid.backend.celery: {"user_id": -1, "msg": "Task finished", "error": "", "pid": 15854, "processtime": 0.006001, "task": "novakid.backend.facebook.tasks.process_lead", "task_args": "({'created_time': '2020-05-31T13:36:24+0000', 'id': '550532535659457', 'ad_id': '23844785264560091', 'form_id': '1168906423443622', 'field_data': [{'name': 'full_name', 'values': ['Petra Zemkov\u00e1']}, {'name': 'phone_number', 'values': ['+420774020277']}, {'name': 'email', 'values': ['zemkova.petra@gmail.com']}]},)", "task_id": "c027e67f-414d-4cdc-805a-c4163c50969e", "task_kwargs": "{'facebook_app_name': 'global_page'}"}

print(b)
print(b.groups())

c = CELERY_TASKS.match(celery)
print(c)
print(c.groups())


file_name = 'syslog.1'
c = count()
step = 100000

if True:
    minutes_filter = []
    hours_filter = []
else:
    minutes_filter = [(25, 29), (55, 59)]
    hours_filter = [(13, 17)]

SORT_KEY = 'processtime'

def show_stat(stat_data, n=None):
    sorted_stat = [{'name': k, 'count': v['count'], 'processtime': v['processtime'], 'average': v['processtime']/v['count']} for k, v in stat_data.items()]
    # print(sorted_stat)
    sorted_stat.sort(key=lambda k: k['average'], reverse=True)
    # print(sorted_stat)
    if n is None:
        n = len(sorted_stat)
    for x in sorted_stat[:n]:
        print(x)


def check_time_filter(hour_filters, minute_filters, str_time):
    if len(hour_filters) == 0 and len(minute_filters) == 0:
        return True

    split_time = str_time.split(':')
    hours = int(split_time[0])
    minutes = int(split_time[1])

    if hours_filter:
        ignore = True
        for x in hours_filter:
             if x[0]<= hours <= x[1]:
                ignore = False
                break
        if ignore:
            return False

        if minutes_filter:
            ignore = True
            for x in minutes_filter:
                if x[0]<= minutes <= x[1]:
                     ignore = False
                     break
            if ignore:
                return False

    return True

with open(file_name, 'r') as f:
    for s in f:
        all_count = next(c)
        #if 'ctx_class' not in s:
        #    continue
        # matched_key = EXTRACT_CTX_CLASS.match(s)
        matched_key = EXTRACT_CTX_CLASS_2.match(s)
        if matched_key:
            month, day, str_time, ctx_name, http_method, processtime = matched_key.groups()
            if not check_time_filter(hours_filter, minutes_filter, str_time):
                continue

            name = ctx_name + " " + http_method
            ctx_name_stat = stat_data[name]
            ctx_name_stat['count'] += 1
            ctx_name_stat['processtime'] += float(processtime)
            ctx_name_stat = stat_data['All']
            ctx_name_stat['count'] += 1
            ctx_name_stat['processtime'] += float(processtime)
            #if all_count % step == 0:
            #    show_stat(stat_data, n=7)
            #    print("#"*10)
        else:
            matched_key = CELERY_TASKS.match(s)
            if matched_key:
                month, day, str_time, processtime, task = matched_key.groups()
                if not check_time_filter(hours_filter, minutes_filter, str_time):
                    continue

                #import pdb
                #pdb.set_trace()
                name = task
                celery_task_stat = celery_stat_data[name]
                celery_task_stat['count'] += 1
                celery_task_stat['processtime'] += float(processtime)
                celery_task_stat = celery_stat_data['All']
                celery_task_stat['count'] += 1
                celery_task_stat['processtime'] += float(processtime)

print('filters:')
print(f'hours_filter: {hours_filter}')
print(f'minutes_filter:  {minutes_filter}')
print("request stat")
show_stat(stat_data, n=100)
print("\n\ncelery stat:\n\n")
show_stat(celery_stat_data, n=100)

    
Comments (0)

HTTPS
SSH
You can clone a snippet to your computer for local editing. Learn more.
Snippets

ASD Technologies Kr95MA: Untitled snippet

Comments (0)