Commits

Stefan Saasen  committed b019390

Add 'requestDurations' command to extract the durations of clone requests

Collect the duration of clone requests/responses (both shallow and normal) in
milliseconds

The output format is:

Date | Duration (cache hit) | Duration (cache miss) | Client IP

  • Participants
  • Parent commits f3128e5

Comments (0)

Files changed (5)

File gnuplot/generate-git-ops-plot.plot

 plot    "plot-git-ops" using 1:6  with lines title "ref advertisement", \
         "plot-git-ops" using 1:11 with lines title "ref advertisement (hit)", \
         "plot-git-ops" using 1:16 with lines title "ref advertisement (miss)"
+
+
+# ===================================================================================
+set format x "%d/%m\n%H:%M"
+set timefmt "%Y-%m-%d %H:%M:%S"
+set output "git-clone-duration.png"
+set title "Duration of clone and shallow clone operations (seconds)"
+
+# Date | Duration (cache hit) | Duration (cache miss) | Client IP | hostname | Provider
+plot    "clone-duration"        using 1:($2/1000)                      with lines title "Clone (cache hit)",\
+        "clone-duration"        using 1:($3/1000)                      with lines title "Clone (cache miss)"
+

File logparser/src/Main.hs

                 appCategories = ["Logfile analysis", "Debug"],
                 appShortDesc = "Logparser for the Atlassian Stash access logs",
                 appLongDesc = "Parses and aggregates the access logs of Atlassian Stash",
-                appCmds = [count, countRequests, maxConn, summarizeGitOperations, summarizeProtocolStats, debugParser ]
+                appCmds = [count, countRequests, maxConn, summarizeGitOperations, requestDurations, summarizeProtocolStats, debugParser ]
         }
 
 
-count, countRequests, maxConn, summarizeGitOperations, summarizeProtocolStats, debugParser :: Command ()
+count, countRequests, maxConn, summarizeGitOperations, requestDurations, summarizeProtocolStats, debugParser :: Command ()
 count = defCmd {
                 cmdName = "count",
                 cmdHandler = commandHandler $ printCountLines countLines,
                 cmdShortDesc = "Aggregate git operations per hour. Show counts for fetch, clone, push, pull and ref advertisement"
         }
 
+requestDurations = defCmd {
+                cmdName = "requestDurations",
+                cmdHandler = commandHandler $ generateCloneRequestDurations cloneRequestDuration,
+                cmdCategory = "Logfile analysis",
+                cmdShortDesc = "Show the duration of clone operations over time"
+        }
+
 summarizeProtocolStats = defCmd {
                 cmdName = "protocolStats",
                 cmdHandler = commandHandler $ generateProtocolData protocolStatsByHour,
         printf "# Date | Max conncurrent connection\n"
         mapM_ (\pd -> printf "%s|%d\n" (formatLogDate $ getLogDate pd) (getValue pd)) plotData
 
+generateCloneRequestDurations :: (Input -> [RequestDurationStat]) -> [FilePath] -> IO ()
+generateCloneRequestDurations f path = do
+        plotData <- liftM f $ toLines path
+        printf "# Date | Duration (cache hit) | Duration (cache miss) | Client IP\n"
+        mapM_ (\pd -> printf "%s|%d|%d|%s\n" (show $ getDurationDate pd) (getDurationHit pd) (getDurationMiss pd) (getClientIp pd)) plotData
+
 parseAndPrint :: (Show a) => (Input -> a) -> [FilePath] -> IO ()
 parseAndPrint f path = print . f . L.lines =<< readFiles path
 

File logparser/src/Stash/Log/GitOpsAnalyser.hs

 module Stash.Log.GitOpsAnalyser
 ( GitOperationStats(..)
 , analyseGitOperations
+, RequestDurationStat(..)
+, cloneRequestDuration
 ) where
 
 import qualified Data.ByteString.Char8 as S
+import Data.String.Utils (split)
 import Data.List (foldl', groupBy)
 import Data.Maybe (isJust)
 import Data.Function (on)
     ,cacheHits                  :: [Int]
 }
 
+data RequestDurationStat = RequestDurationStat {
+    getDurationDate             :: !LogDate
+   ,getClientIp                 :: !String
+   ,getDurationHit              :: !Int
+   ,getDurationMiss             :: !Int
+}
+
 -- | Parse and aggregate the log file input into a list of hourly GitOperationStats
 analyseGitOperations :: Input -> [GitOperationStats]
 analyseGitOperations rawLines =
                             (getDay date) (getHour date)
     in analyseGitOperations' logDateEqHour formatLogDate rawLines
 
+-- | Return the duration of clone (clone and shallow clone) operations
+cloneRequestDuration :: Input -> [RequestDurationStat]
+cloneRequestDuration rawLines = collectRequestDurations rawLines authenticatedClone
+
+
 -- =================================================================================
 
+authenticatedClone :: LogLine -> Bool
+authenticatedClone line = isJust (getUsername line) && (isShallowClone line || isClone line)
+
 emptyStats :: GitOperationStats
 emptyStats = GitOperationStats "" zero zero
-        where zero = replicate 5 0
+            where zero = replicate 5 0
+
+collectRequestDurations :: Input -> (LogLine -> Bool) -> [RequestDurationStat]
+collectRequestDurations rawLines p = map m $ filter f $ parseLogLines rawLines
+        where clientIp line = head $ split "," (S.unpack $ getRemoteAdress line)
+              m line        = if isCacheHit line
+                              then RequestDurationStat (getDate line) (clientIp line) (getRequestDuration line) 0
+                              else RequestDurationStat (getDate line) (clientIp line) 0 (getRequestDuration line)
+              f line        = isOutgoingLogLine line && p line
+
 
 analyseGitOperations' :: (LogDate -> LogDate -> Bool) -> (LogDate -> String) -> Input -> [GitOperationStats]
 analyseGitOperations' comp formatLogDate rawLines =

File logparser/src/Stash/Log/Parser.hs

 import Data.ByteString.Char8 (readInteger)
 import Data.String.Utils (split)
 import Data.Maybe (mapMaybe)
+import Text.Printf (printf)
 -- REMOTE_ADRESS | PROTOCOL | (o|i)REQUEST_ID | USERNAME | date |  URL | DETAILS | LABELS | TIME | SESSION_ID |
 -- REQUEST_ID -> MINUTE_OF_DAYxREQUEST_COUNTERxCONCURRENT_REQUESTS
 
     ,getAction              :: Action
     ,getDetails             :: S.ByteString
     ,getLabels              :: [String]
-    ,getRequestDuration     :: S.ByteString
+    ,getRequestDuration     :: Int
     ,getSessionId           :: S.ByteString
 } deriving (Show, Eq)
 
     ,getMinute      :: !Int
     ,getSeconds     :: !Int
     ,getMillis      :: !Int
-} deriving (Show, Eq)
+} deriving (Eq)
+
+instance Show LogDate where
+    show date = printf "%04d-%02d-%02d %02d:%02d:%02d" (getYear date)
+                                (getMonth date)
+                                (getDay date)
+                                (getHour date)
+                                (getMinute date)
+                                (getSeconds date)
 
 -- | Parse the input into a list of LogLines
 parseLogLines :: Input -> [LogLine]
     action <- parseAction
     details <- logEntry
     labels_ <- logEntry
-    duration <- logEntry
+    rawDuration <- logEntry
     sessionId <- logEntry
     let labels = map trim $ split "," (S.unpack labels_)
         username = if rawUsername == "-" then Nothing else Just rawUsername
+        duration = read $ S.unpack rawDuration
     return $ LogLine remoteAddress protocol requestId username date
                     action details labels duration sessionId
 

File regenerate-graphs.sh

 LOG_FILE=${1:-"../access-logs/aggregated-log-file"}
 
 time logparser gitOperations ${LOG_FILE} +RTS -sstderr > plot-git-ops
+time logparser requestDurations ${LOG_FILE} +RTS -sstderr > clone-duration
 gnuplot < gnuplot/generate-git-ops-plot.plot
 
 time logparser maxConn ${LOG_FILE} +RTS -sstderr  > plot-all