Snippets

ASD Technologies on54ba: Untitled snippet

Created by Denis Kirichenko
version = '2020_06_13_00:00'
import argparse
import sys
import re
from collections import defaultdict
from itertools import count
from datetime import time

# stat_data = defaultdict(lambda: defaultdict(dict))
# stat_data = defaultdict(lambda: defaultdict(lambda: {'count': 0, 'processtime': 0.0}))
stat_data = defaultdict(lambda: {'count': 0, 'processtime': 0.0, 'params_stat': defaultdict(lambda: {'count': 0, 'processtime': 0.0})})
celery_stat_data = defaultdict(lambda: {'count': 0, 'processtime': 0.0, 'params_stat': defaultdict(lambda: {'count': 0, 'processtime': 0.0})})


EXTRACT_CTX_CLASS = re.compile(r'^.+\"ctx_class\"\: (?P<ctx_name>[^\,]+),.+\"processtime\"\: (?P<processtime>[^\,]+),.+$')
# May 27 06:25:10
EXTRACT_CTX_CLASS_2 = re.compile(r'^(?P<month>[^\s]+)\s(?P<day>[^\s]+)\s(?P<str_time>[^\s]+)\s.+\"ctx_class\"\: (?P<ctx_name>[^\,]+),.+\"http_method\": (?P<http_method>[^\,]+),.+\"processtime\"\: (?P<processtime>[^\,]+),.+$')

GET_DATA = re.compile("^.+\"getparameters\": \"(?P<getparameters>[^\,]*)\".+")
POST_BODY = re.compile("^.+\"req_body\": \"(?P<req_body>[^\,]+)\".+")
a = 'May 27 06:26:34 qa requests: {"user_id": 0, "msg": "", "ctx_class": "NovakidRoot", "ctx_name": "", "getparameters": "", "http_method": "GET", "ip": "54.72.126.205", "pid": 25977, "processtime": 0.000811, "replycode": 200, "req_body": "", "time": 1590560794.5993605, "url": "/", "user_agent": "Zabbix"}'
b = EXTRACT_CTX_CLASS.match(a)
CELERY_TASKS = re.compile(r'^(?P<month>[^\s]+)\s(?P<day>[^\s]+)\s(?P<str_time>[^\s]+).*celery:.*\"processtime\"\: (?P<processtime>[^\,]+),.*\"task\"\: (?P<task>[^\,]+).*$')
celery = """Jul 22 12:20:13 qa novakid.backend.celery: {"user_id": -1, "msg": "Task finished", "error": "", "pid": 15854, "processtime": 0.006001, "task": "novakid.backend.facebook.tasks.process_lead", "task_args": "({'created_time': '2020-05-31T13:36:24+0000', 'id': '550532535659457', 'ad_id': '23844785264560091', 'form_id': '1168906423443622', 'field_data': [{'name': 'full_name', 'values': ['Petra Zemkov\u00e1']}, {'name': 'phone_number', 'values': ['+420774020277']}, {'name': 'email', 'values': ['zemkova.petra@gmail.com']}]},)", "task_id": "c027e67f-414d-4cdc-805a-c4163c50969e", "task_kwargs": "{'facebook_app_name': 'global_page'}"}"""

#Jul 22 12:27:00 qa novakid.backend.utils: {"user_id": -1, "msg": "used_memory_vms::before::novakid.backend.classes.tasks.other_tasks.sync_class_pings_through_redis 484 MB", "pid": 15854}
#Jul 22 12:25:00 qa novakid.backend.celery: {"user_id": -1, "msg": "Task added", "pid": 15401, "task": "novakid.backend.classes.tasks.other_tasks.sync_class_pings_through_redis", "task_args": "()", "task_id": "0a9d7063-8a25-4526-bc28-586de087684b", "task_kwargs": "{}"}
#Jul 22 12:20:13 qa novakid.backend.celery: {"user_id": -1, "msg": "Task finished", "error": "", "pid": 15854, "processtime": 0.006001, "task": "novakid.backend.facebook.tasks.process_lead", "task_args": "({'created_time': '2020-05-31T13:36:24+0000', 'id': '550532535659457', 'ad_id': '23844785264560091', 'form_id': '1168906423443622', 'field_data': [{'name': 'full_name', 'values': ['Petra Zemkov\u00e1']}, {'name': 'phone_number', 'values': ['+420774020277']}, {'name': 'email', 'values': ['zemkova.petra@gmail.com']}]},)", "task_id": "c027e67f-414d-4cdc-805a-c4163c50969e", "task_kwargs": "{'facebook_app_name': 'global_page'}"}

print(b)
print(b.groups())

c = CELERY_TASKS.match(celery)
print(c)
print(c.groups())

parser = argparse.ArgumentParser(description='Parse Kibana')
parser.add_argument('-f', '--filename', metavar='FILENAME',
    default='syslog.1', help='File for parsing')
parser.add_argument('-s', '--sortkey', metavar='SORTKEY',
    default='processtime', help='sorkey')
parser.add_argument('-p', '--print-params-stat', default=False, action='store_true', help='print prams stat flag')
parser.add_argument('-u', '--use-time-filters', default=False, action='store_true', help='use time filters')
#import pdb
#pdb.set_trace()
parsed = parser.parse_args(sys.argv[1:])
filename = parsed.filename
c = count()
step = 100000

if parsed.use_time_filters:
    minutes_filter = [(25, 29), (55, 59)]
    hours_filter = [(13, 17)]
else:
    minutes_filter = []
    hours_filter = []

SORT_KEY = parsed.sortkey
PRINT_PARAMS_STAT = parsed.print_params_stat

def show_stat(stat_data, print_params_stat=True, shift=0, n=None):
    if print_params_stat:
        sorted_stat = [{'name': k, 'count': v['count'], 'processtime': v['processtime'], 'average': v['processtime']/v['count'], 'params_stat': v['params_stat']} for k, v in stat_data.items()]
    else:
        sorted_stat = [{'name': k, 'count': v['count'], 'processtime': v['processtime'], 'average': v['processtime']/v['count'],} for k, v in stat_data.items()]
    # print(sorted_stat)
    sorted_stat.sort(key=lambda k: k[SORT_KEY], reverse=True)
    # print(sorted_stat)
    if n is None:
        n = len(sorted_stat)
    for x in sorted_stat[:n]:
        params_stat = x.pop('params_stat', None)
        print(' ' * shift + str(x))
        if params_stat:
            show_stat(params_stat, print_params_stat=False, shift=4, n=n)


def check_time_filter(hour_filters, minute_filters, str_time):
    if len(hour_filters) == 0 and len(minute_filters) == 0:
        return True

    split_time = str_time.split(':')
    hours = int(split_time[0])
    minutes = int(split_time[1])

    if hours_filter:
        ignore = True
        for x in hours_filter:
             if x[0]<= hours <= x[1]:
                ignore = False
                break
        if ignore:
            return False

        if minutes_filter:
            ignore = True
            for x in minutes_filter:
                if x[0]<= minutes <= x[1]:
                     ignore = False
                     break
            if ignore:
                return False

    return True

with open(filename, 'r') as f:
    for s in f:
        all_count = next(c)
        #if 'ctx_class' not in s:
        #    continue
        # matched_key = EXTRACT_CTX_CLASS.match(s)
        matched_key = EXTRACT_CTX_CLASS_2.match(s)
        if matched_key:
            month, day, str_time, ctx_name, http_method, processtime = matched_key.groups()
            if not check_time_filter(hours_filter, minutes_filter, str_time):
                continue

            params_values = []

            if http_method == '"GET"':
                get_data_key = GET_DATA.match(s)
                params_list = get_data_key.groups()[0]
                if params_list:
                    params_values = params_list.split('&')

            name = ctx_name + " " + http_method
            ctx_name_stat = stat_data[name]
            ctx_name_stat['count'] += 1
            ctx_name_stat['processtime'] += float(processtime)

            for param in params_values:
                #import pdb
                #pdb.set_trace()
                ctx_name_stat['params_stat'][param]['count'] += 1
                ctx_name_stat['params_stat'][param]['processtime'] += float(processtime)

            ctx_name_stat = stat_data['All']
            ctx_name_stat['count'] += 1
            ctx_name_stat['processtime'] += float(processtime)
            #if all_count % step == 0:
            #    show_stat(stat_data, n=7)
            #    print("#"*10)
        else:
            matched_key = CELERY_TASKS.match(s)
            if matched_key:
                month, day, str_time, processtime, task = matched_key.groups()
                if not check_time_filter(hours_filter, minutes_filter, str_time):
                    continue

                #import pdb
                #pdb.set_trace()
                name = task
                celery_task_stat = celery_stat_data[name]
                celery_task_stat['count'] += 1
                celery_task_stat['processtime'] += float(processtime)
                celery_task_stat = celery_stat_data['All']
                celery_task_stat['count'] += 1
                celery_task_stat['processtime'] += float(processtime)

print('filters:')
print(f'hours_filter: {hours_filter}')
print(f'minutes_filter:  {minutes_filter}')
print("request stat")
show_stat(stat_data, n=100, print_params_stat=PRINT_PARAMS_STAT)
print("\n\ncelery stat:\n\n")
show_stat(celery_stat_data, n=100, print_params_stat=False)

Comments (0)

HTTPS SSH

You can clone a snippet to your computer for local editing. Learn more.