Snippets
Created by
Denis Kirichenko
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 | version = '2020_06_13_00:00'
import argparse
import sys
import re
from collections import defaultdict
from itertools import count
from datetime import time
# stat_data = defaultdict(lambda: defaultdict(dict))
# stat_data = defaultdict(lambda: defaultdict(lambda: {'count': 0, 'processtime': 0.0}))
stat_data = defaultdict(lambda: {'count': 0, 'processtime': 0.0, 'params_stat': defaultdict(lambda: {'count': 0, 'processtime': 0.0})})
celery_stat_data = defaultdict(lambda: {'count': 0, 'processtime': 0.0, 'params_stat': defaultdict(lambda: {'count': 0, 'processtime': 0.0})})
EXTRACT_CTX_CLASS = re.compile(r'^.+\"ctx_class\"\: (?P<ctx_name>[^\,]+),.+\"processtime\"\: (?P<processtime>[^\,]+),.+$')
# May 27 06:25:10
EXTRACT_CTX_CLASS_2 = re.compile(r'^(?P<month>[^\s]+)\s(?P<day>[^\s]+)\s(?P<str_time>[^\s]+)\s.+\"ctx_class\"\: (?P<ctx_name>[^\,]+),.+\"http_method\": (?P<http_method>[^\,]+),.+\"processtime\"\: (?P<processtime>[^\,]+),.+$')
GET_DATA = re.compile("^.+\"getparameters\": \"(?P<getparameters>[^\,]*)\".+")
POST_BODY = re.compile("^.+\"req_body\": \"(?P<req_body>[^\,]+)\".+")
a = 'May 27 06:26:34 qa requests: {"user_id": 0, "msg": "", "ctx_class": "NovakidRoot", "ctx_name": "", "getparameters": "", "http_method": "GET", "ip": "54.72.126.205", "pid": 25977, "processtime": 0.000811, "replycode": 200, "req_body": "", "time": 1590560794.5993605, "url": "/", "user_agent": "Zabbix"}'
b = EXTRACT_CTX_CLASS.match(a)
CELERY_TASKS = re.compile(r'^(?P<month>[^\s]+)\s(?P<day>[^\s]+)\s(?P<str_time>[^\s]+).*celery:.*\"processtime\"\: (?P<processtime>[^\,]+),.*\"task\"\: (?P<task>[^\,]+).*$')
celery = """Jul 22 12:20:13 qa novakid.backend.celery: {"user_id": -1, "msg": "Task finished", "error": "", "pid": 15854, "processtime": 0.006001, "task": "novakid.backend.facebook.tasks.process_lead", "task_args": "({'created_time': '2020-05-31T13:36:24+0000', 'id': '550532535659457', 'ad_id': '23844785264560091', 'form_id': '1168906423443622', 'field_data': [{'name': 'full_name', 'values': ['Petra Zemkov\u00e1']}, {'name': 'phone_number', 'values': ['+420774020277']}, {'name': 'email', 'values': ['zemkova.petra@gmail.com']}]},)", "task_id": "c027e67f-414d-4cdc-805a-c4163c50969e", "task_kwargs": "{'facebook_app_name': 'global_page'}"}"""
#Jul 22 12:27:00 qa novakid.backend.utils: {"user_id": -1, "msg": "used_memory_vms::before::novakid.backend.classes.tasks.other_tasks.sync_class_pings_through_redis 484 MB", "pid": 15854}
#Jul 22 12:25:00 qa novakid.backend.celery: {"user_id": -1, "msg": "Task added", "pid": 15401, "task": "novakid.backend.classes.tasks.other_tasks.sync_class_pings_through_redis", "task_args": "()", "task_id": "0a9d7063-8a25-4526-bc28-586de087684b", "task_kwargs": "{}"}
#Jul 22 12:20:13 qa novakid.backend.celery: {"user_id": -1, "msg": "Task finished", "error": "", "pid": 15854, "processtime": 0.006001, "task": "novakid.backend.facebook.tasks.process_lead", "task_args": "({'created_time': '2020-05-31T13:36:24+0000', 'id': '550532535659457', 'ad_id': '23844785264560091', 'form_id': '1168906423443622', 'field_data': [{'name': 'full_name', 'values': ['Petra Zemkov\u00e1']}, {'name': 'phone_number', 'values': ['+420774020277']}, {'name': 'email', 'values': ['zemkova.petra@gmail.com']}]},)", "task_id": "c027e67f-414d-4cdc-805a-c4163c50969e", "task_kwargs": "{'facebook_app_name': 'global_page'}"}
print(b)
print(b.groups())
c = CELERY_TASKS.match(celery)
print(c)
print(c.groups())
parser = argparse.ArgumentParser(description='Parse Kibana')
parser.add_argument('-f', '--filename', metavar='FILENAME',
default='syslog.1', help='File for parsing')
parser.add_argument('-s', '--sortkey', metavar='SORTKEY',
default='processtime', help='sorkey')
parser.add_argument('-p', '--print-params-stat', default=False, action='store_true', help='print prams stat flag')
parser.add_argument('-u', '--use-time-filters', default=False, action='store_true', help='use time filters')
#import pdb
#pdb.set_trace()
parsed = parser.parse_args(sys.argv[1:])
filename = parsed.filename
c = count()
step = 100000
if parsed.use_time_filters:
minutes_filter = [(25, 29), (55, 59)]
hours_filter = [(13, 17)]
else:
minutes_filter = []
hours_filter = []
SORT_KEY = parsed.sortkey
PRINT_PARAMS_STAT = parsed.print_params_stat
def show_stat(stat_data, print_params_stat=True, shift=0, n=None):
if print_params_stat:
sorted_stat = [{'name': k, 'count': v['count'], 'processtime': v['processtime'], 'average': v['processtime']/v['count'], 'params_stat': v['params_stat']} for k, v in stat_data.items()]
else:
sorted_stat = [{'name': k, 'count': v['count'], 'processtime': v['processtime'], 'average': v['processtime']/v['count'],} for k, v in stat_data.items()]
# print(sorted_stat)
sorted_stat.sort(key=lambda k: k[SORT_KEY], reverse=True)
# print(sorted_stat)
if n is None:
n = len(sorted_stat)
for x in sorted_stat[:n]:
params_stat = x.pop('params_stat', None)
print(' ' * shift + str(x))
if params_stat:
show_stat(params_stat, print_params_stat=False, shift=4, n=n)
def check_time_filter(hour_filters, minute_filters, str_time):
if len(hour_filters) == 0 and len(minute_filters) == 0:
return True
split_time = str_time.split(':')
hours = int(split_time[0])
minutes = int(split_time[1])
if hours_filter:
ignore = True
for x in hours_filter:
if x[0]<= hours <= x[1]:
ignore = False
break
if ignore:
return False
if minutes_filter:
ignore = True
for x in minutes_filter:
if x[0]<= minutes <= x[1]:
ignore = False
break
if ignore:
return False
return True
with open(filename, 'r') as f:
for s in f:
all_count = next(c)
#if 'ctx_class' not in s:
# continue
# matched_key = EXTRACT_CTX_CLASS.match(s)
matched_key = EXTRACT_CTX_CLASS_2.match(s)
if matched_key:
month, day, str_time, ctx_name, http_method, processtime = matched_key.groups()
if not check_time_filter(hours_filter, minutes_filter, str_time):
continue
params_values = []
if http_method == '"GET"':
get_data_key = GET_DATA.match(s)
params_list = get_data_key.groups()[0]
if params_list:
params_values = params_list.split('&')
name = ctx_name + " " + http_method
ctx_name_stat = stat_data[name]
ctx_name_stat['count'] += 1
ctx_name_stat['processtime'] += float(processtime)
for param in params_values:
#import pdb
#pdb.set_trace()
ctx_name_stat['params_stat'][param]['count'] += 1
ctx_name_stat['params_stat'][param]['processtime'] += float(processtime)
ctx_name_stat = stat_data['All']
ctx_name_stat['count'] += 1
ctx_name_stat['processtime'] += float(processtime)
#if all_count % step == 0:
# show_stat(stat_data, n=7)
# print("#"*10)
else:
matched_key = CELERY_TASKS.match(s)
if matched_key:
month, day, str_time, processtime, task = matched_key.groups()
if not check_time_filter(hours_filter, minutes_filter, str_time):
continue
#import pdb
#pdb.set_trace()
name = task
celery_task_stat = celery_stat_data[name]
celery_task_stat['count'] += 1
celery_task_stat['processtime'] += float(processtime)
celery_task_stat = celery_stat_data['All']
celery_task_stat['count'] += 1
celery_task_stat['processtime'] += float(processtime)
print('filters:')
print(f'hours_filter: {hours_filter}')
print(f'minutes_filter: {minutes_filter}')
print("request stat")
show_stat(stat_data, n=100, print_params_stat=PRINT_PARAMS_STAT)
print("\n\ncelery stat:\n\n")
show_stat(celery_stat_data, n=100, print_params_stat=False)
|
Comments (0)
You can clone a snippet to your computer for local editing. Learn more.