version = '2020_06_13_00:00'
import re
from collections import defaultdict
from itertools import count
from datetime import time
# stat_data = defaultdict(lambda: defaultdict(dict))
# stat_data = defaultdict(lambda: defaultdict(lambda: {'count': 0, 'processtime': 0.0}))
stat_data = defaultdict(lambda: {'count': 0, 'processtime': 0.0})
EXTRACT_CTX_CLASS = re.compile(r'^.+\"ctx_class\"\: (?P<ctx_name>[^\,]+),.+\"processtime\"\: (?P<processtime>[^\,]+),.+$')
# May 27 06:25:10
EXTRACT_CTX_CLASS_2 = re.compile(r'^(?P<month>[^\s]+)\s(?P<day>[^\s]+)\s(?P<str_time>[^\s]+)\s.+\"ctx_class\"\: (?P<ctx_name>[^\,]+),.+\"processtime\"\: (?P<processtime>[^\,]+),.+$')
# EXTRACT_CTX_CLASS_3 = re.compile(r'^(?P<datetime>[^\s]+\s[^\s]+\s[^\s]+)\s.+\"ctx_class\"\: (?P<ctx_name>[^\,]+),.+\"processtime\"\: (?P<processtime>[^\,]+),.+$')
a = 'May 27 06:26:34 qa requests: {"user_id": 0, "msg": "", "ctx_class": "NovakidRoot", "ctx_name": "", "getparameters": "", "http_method": "GET", "ip": "54.72.126.205", "pid": 25977, "processtime": 0.000811, "replycode": 200, "req_body": "", "time": 1590560794.5993605, "url": "/", "user_agent": "Zabbix"}'
b = EXTRACT_CTX_CLASS.match(a)
print(b)
print(b.groups())
file_name = 'syslog.txt'
c = count()
step = 100000
if False:
minutes_filter = []
hours_filter = []
else:
minutes_filter = [(25, 29), (55, 59)]
hours_filter = [(13, 17)]
def show_stat(stat_data, n=None):
sorted_stat = [{'ctx_class': k, 'count': v['count'], 'processtime': v['processtime']} for k, v in stat_data.items()]
# print(sorted_stat)
sorted_stat.sort(key=lambda k: k['processtime'], reverse=True)
# print(sorted_stat)
if n is None:
n = len(sorted_stat)
for x in sorted_stat[:n]:
print(x)
with open(file_name, 'r') as f:
for s in f:
all_count = next(c)
if 'ctx_class' not in s:
continue
# matched_key = EXTRACT_CTX_CLASS.match(s)
matched_key = EXTRACT_CTX_CLASS_2.match(s)
if matched_key:
month, day, str_time, ctx_name, processtime = matched_key.groups()
# print((str_time, ctx_name, float(processtime)))
split_time = str_time.split(':')
# print(split_time)
hours = int(split_time[0])
minutes = int(split_time[1])
# print(f'{hours}:{minutes}')
if hours_filter:
ignore = True
for x in hours_filter:
if x[0]<= hours <= x[1]:
ignore = False
# print(f'{str_time} - accepted hours')
break
if ignore:
# print(f'{str_time} - ignored hours')
continue
if minutes_filter:
ignore = True
for x in minutes_filter:
if x[0]<= minutes <= x[1]:
# print(f'{str_time} - accepted minutes')
ignore = False
break
if ignore:
# print(f'{str_time} - ignored minutes')
continue
ctx_name_stat = stat_data[ctx_name]
ctx_name_stat['count'] += 1
ctx_name_stat['processtime'] += float(processtime)
ctx_name_stat = stat_data['All']
ctx_name_stat['count'] += 1
ctx_name_stat['processtime'] += float(processtime)
if all_count % step == 0:
show_stat(stat_data, n=7)
print("#"*10)
print('filters:')
print(f'hours_filter: {hours_filter}')
print(f'minutes_filter: {minutes_filter}')
show_stat(stat_data, n=100)