Created by Alain Mazy last modified 2020-02-12

analyze.py

      import json
import argparse
import pandas as pd

parser = argparse.ArgumentParser(description = "Detects large intervals without any input in Docker json logs")
parser.add_argument("inputFile", type = str, help = "the log file to parse")
parser.add_argument("--deltaMs", type = int, default = 500, help = "duration of the interval in ms")
parser.add_argument("--previousLogs", type = int, default = 10, help = "number of lines to display before the detected interval")
args = parser.parse_args()

previousTimeStamp = None
previousLogs = []

with open(args.inputFile) as f:
    for line in f:

        jsonLine = json.loads(line)

        timeStamp = pd.to_datetime(jsonLine["time"], format = "%Y-%m-%dT%H:%M:%S.%fZ")

        if previousTimeStamp is not None and (timeStamp - previousTimeStamp) > pd.Timedelta(args.deltaMs, unit = "ms"):
            for p in previousLogs:
                print(p["log"].rstrip())
            print(jsonLine["log"].rstrip())
            print("")

        previousTimeStamp = timeStamp

        previousLogs.append(jsonLine)
        if len(previousLogs) > args.previousLogs:
            previousLogs.pop(0)

    

average.py

Raw

      import json
import argparse
import pandas as pd
import statistics

parser = argparse.ArgumentParser(description = "Show average TimeDelta after a given log in Docker json logs")
parser.add_argument("inputFile", type = str, help = "the log file to parse")
parser.add_argument("--text", type = str, help = "text to find in logs")
parser.add_argument("--deltaMs", type = int, default = 100, help = "count the # of logs whose duration is greater than this interval in ms")

args = parser.parse_args()

previousTimeStamp = None
previousLine = None
measures = []
longDurationCount = 0

with open(args.inputFile) as f:
    for line in f:

        jsonLine = json.loads(line)

        timeStamp = pd.to_datetime(jsonLine["time"], format = "%Y-%m-%dT%H:%M:%S.%fZ")

        if previousLine is not None and args.text in previousLine["log"]:
            ts = (timeStamp - previousTimeStamp).total_seconds() * 1000
            measures.append(ts)
            # print("{m} ms".format(m = measures[len(measures) - 1]))
            if ts > args.deltaMs:
                longDurationCount = longDurationCount + 1

        previousTimeStamp = timeStamp
        previousLine = jsonLine

    print("found {n} occurences of '{t}'".format(n = len(measures), t = args.text))
    print("{n} occurences took more than {d} ms".format(n = longDurationCount, d = args.deltaMs))
    print("average = {m} ms".format(m = statistics.mean(measures)))
    print("stddev = {m} ms".format(m = statistics.stdev(measures)))
    print("max = {m} ms".format(m = max(measures)))

    

interval.py

Raw

      # sample usages:
# python interval.py c:\Users\alain\Downloads\delete-lify-json.log --textFrom="POST /instances" --textTo="New instance stored" --format="json"
# python interval.py c:\Users\alain\Downloads\delete-lify-json.log --textFrom="DELETE /" --textTo="of type 2"

import json
import argparse
import pandas as pd
import statistics

parser = argparse.ArgumentParser(description = "Show average TimeDelta between two logs Docker json logs")
parser.add_argument("inputFile", type = str, help = "the log file to parse")
parser.add_argument("--format", type = str, default = "json", help = "docker logs format json/text")
parser.add_argument("--textFrom", type = str, help = "text to find in logs")
parser.add_argument("--textTo", type = str, help = "text to find in logs")
parser.add_argument("--deltaMs", type = int, default = 100, help = "count the # of logs whose duration is greater than this interval in ms")

args = parser.parse_args()

fromTimeStamp = None
fromLog = None
measures = []
longDurationCount = 0


with open(args.inputFile) as f:
    for line in f:

        if args.format == "json":
            jsonLine = json.loads(line)
            timeStamp = pd.to_datetime(jsonLine["time"], format = "%Y-%m-%dT%H:%M:%S.%fZ")
        else:
            tsString = line.split(" ")[1]
            timeStamp = pd.to_datetime(tsString, format = "%H:%M:%S.%f")
            jsonLine = {
                "log" : line
            }


        if fromTimeStamp is None and args.textFrom in jsonLine["log"]:
            fromTimeStamp = timeStamp
            fromLog = jsonLine
        if fromTimeStamp is not None and args.textTo in jsonLine["log"]:
            ts = (timeStamp - fromTimeStamp).total_seconds() * 1000
            measures.append(ts)
            # print("{m} ms".format(m = measures[len(measures) - 1]))
            if ts > args.deltaMs:
                longDurationCount = longDurationCount + 1
                print(fromLog["log"].rstrip())
                print(jsonLine["log"].rstrip())
                print("")
            fromTimeStamp = None

    print("found {n} occurences of '{t1}' followed by '{t2}'".format(n = len(measures), t1 = args.textFrom, t2 = args.textTo))
    print("{n} occurences took more than {d} ms".format(n = longDurationCount, d = args.deltaMs))
    print("average = {m} ms".format(m = statistics.mean(measures)))
    print("stddev = {m} ms".format(m = statistics.stdev(measures)))
    print("max = {m} ms".format(m = max(measures)))

    

Comments (0)

HTTPS

SSH

You can clone a snippet to your computer for local editing. Learn more.

Snippets

osimis Parse Docker logs to detect lengthy Orthanc operations

Comments (0)