Stefan Saasen avatar Stefan Saasen committed 76addb8

Clean up the existing commands and update the README file with examples

Comments (0)

Files changed (5)

 =================================
 
 The log parser parses and aggregates the access logs of the Atlassian Stash web
-application.
+application. The main focus is on analyzing the git operations as the considerably dominate the overall performance of the application.
 
 Installation
 ------------
 
 
     [922] λ > logparser gitOperations atlassian-stash-access-2012-*.log.bz2
+    # Date | clone | fetch | shallow clone | push | ref advertisement ...
     2012-08-22 18|2|0|13|0|733|0|0|0|0|0|2|0|13|0|733
     2012-08-22 19|3|24|74|0|1660|0|0|0|0|0|3|24|74|0|1660
     2012-08-22 20|2|33|119|0|1369|0|0|0|0|0|2|33|119|0|1369
 
     $> ./regenerate-graphs.sh '/data/stash-access-log/atlassian-stash-access-2012-09*.log*'
 
+Available Commands
+==================
+
+The `gitOperations` command aggregates the number of  `clone`,  `fetch`, `shallow clone`, `push`, `ref advertisement` operations per hour. The example output is shown above.
+
+The `gitDurations` command shows the duration of git operations (for the same set of git operations mentioned above).
+The output of this command looks like this:
+
+    $> logparser cloneDurations ../data/stash-prod-access-log/atlassian-stash-access-2012-12-10.0.log
+    # Date | Clone duration (cache hit) | Clone duration (cache miss) | Fetch (hit) | Fetch (miss) | Shallow Clone (hit) | Shallow Clone (miss) | Push (hit) | Push (miss) | Ref adv (hit) | Ref adv (miss) | Client IP | Username 
+    2012-12-10 00:00:00|0|1848|0|0|0|0|0|0|0|0|172.16.1.187|klaus tester
+    2012-12-10 00:00:00|0|0|0|435|0|0|0|0|0|0|63.246.22.196|bamboo_user
+    2012-12-10 00:00:00|0|0|0|0|0|0|0|0|287|0|63.246.22.196|bamboo_user
+
+The `protocolStats` command aggregates the number of git operations based on the access protocol (http(s) vs. SSH)
+
+    $> logparser protocolStats ../data/stash-prod-access-log/atlassian-stash-access-2012-12-10.0.log 
+    # Date | SSH | HTTP(s)
+    2012-12-10 00|1107|52612
+    2012-12-10 01|651|48442
+    2012-12-10 02|523|42213
+
+The `countRequests` command shows the overall number of requests for the given log files.
+
+    $> logparser countRequests ../data/stash-prod-access-log/atlassian-stash-access-2012-12-10.0.log 
+    72773
+
 Access log format
 =================
 
-Fields:
+The access log contains rows with the following fields separated by ` | `:
 
 * Ip address. If there are multiple addresses, the first address is the 'real' ip address and the remainder the IPs of intermediary proxies
-* Protocol: http/https/ssh
-* Request id of the format: i6x3112x1, where:
+* Protocol: `http/https/ssh`
+* Request id of the format: `i6x3112x1`, where:
 	* i = start of the request, o = end of the request
 	* 6 = minute in day => 0:00:06
 	* x = separator
 * Username: only available on the end of the request
 * Date/Time
 * Action:
-	* for HTTP requests: "<http-method> <request-url> <http-version>"
+	* for HTTP requests: `<http-method> <request-url> <http-version>`
 	* for SSH commands: the ssh command-line
 * Request details:
-	* for HTTP: "<referrer-url>" "<user-agent>"
-	* for SSH: -
+	* for HTTP: `<referrer-url>" "<user-agent>`
+	* for SSH: `-`
 * Labels: used in the application to add 'classifications' to requests. Currently supported:
 	* type of hosting request: push | fetch | clone | shallow clone | refs
 	* clone cache: cache:hit | cache:miss

logparser/src/Main.hs

 
 
 count, countRequests, maxConn, summarizeGitOperations, requestDurations, summarizeProtocolStats, debugParser :: Command ()
-count = defCmd {
-                cmdName = "count",
-                cmdHandler = commandHandler $ printCountLines countLines,
-                cmdCategory = "Logfile analysis",
-                cmdShortDesc = "Count the number of lines in the given logfile"
-        }
 
 countRequests = defCmd {
                 cmdName = "countRequests",
                 cmdHandler = commandHandler $ parseAndPrint countRequestLines,
                 cmdCategory = "Logfile analysis",
-                cmdShortDesc = "Count the number requests"
+                cmdShortDesc = "Count the number of requests"
         }
 
 maxConn = defCmd {
         }
 
 requestDurations = defCmd {
-                cmdName = "requestDurations",
-                cmdHandler = commandHandler $ printCloneRequestDurations cloneRequestDuration,
+                cmdName = "gitDurations",
+                cmdHandler = commandHandler $ printGitRequestDurations gitRequestDuration,
                 cmdCategory = "Logfile analysis",
-                cmdShortDesc = "Show the duration of clone operations over time"
+                cmdShortDesc = "Show the duration of git operations over time"
         }
 
 summarizeProtocolStats = defCmd {
                 cmdShortDesc = "Parse and print the first five lines of the log file"
         }
 
+count = defCmd {
+                cmdName = "count",
+                cmdHandler = commandHandler $ printCountLines countLines,
+                cmdCategory = "Debug",
+                cmdShortDesc = "Count the number of lines in the given logfile(s)"
+        }
+
 commandHandler f = do
     args <- appArgs
     case args of

logparser/src/Stash/Log/GitOpsAnalyser.hs

 ( GitOperationStats(..)
 , analyseGitOperations
 , RequestDurationStat(..)
-, cloneRequestDuration
+, gitRequestDuration
 , isRefAdvertisement
 , protocolCount
 ) where
     in analyseGitOperations' logDateEqHour formatLogDate rawLines
 
 -- | Return the duration of clone (clone and shallow clone) operations
-cloneRequestDuration :: Input -> [RequestDurationStat]
-cloneRequestDuration rawLines = collectRequestDurations rawLines authenticatedGitOp
+gitRequestDuration :: Input -> [RequestDurationStat]
+gitRequestDuration rawLines = collectRequestDurations rawLines authenticatedGitOp
 
 
 -- =================================================================================

logparser/src/Stash/Log/Output.hs

 
 module Stash.Log.Output
 ( printProtocolData
-, printCloneRequestDurations
+, printGitRequestDurations
 , printPlotDataConcurrentConn
 , printPlotDataGitOps
 , parseAndPrint
         printf "# Date | Max concurrent connection\n"
         mapM_ (\pd -> printf "%s|%d\n" (formatLogDate $ getLogDate pd) (getValue pd)) plotData
 
-printCloneRequestDurations :: (Input -> [RequestDurationStat]) -> [FilePath] -> IO ()
-printCloneRequestDurations g path = do
+printGitRequestDurations :: (Input -> [RequestDurationStat]) -> [FilePath] -> IO ()
+printGitRequestDurations g path = do
         plotData <- liftM g $ readLogFiles "printCloneRequestDurations" path
         printf "# Date | Clone duration (cache hit) | Clone duration (cache miss) | Fetch (hit) | Fetch (miss) | Shallow Clone (hit) | Shallow Clone (miss) | Push (hit) | Push (miss) | Ref adv (hit) | Ref adv (miss) | Client IP | Username \n"
         mapM_ (\(RequestDurationStat date clientIp [cm,fm,sm,pm,rm] [c,f,s,p,r] username)

regenerate-graphs.sh

 time logparser gitOperations ${LOG_FILE} +RTS -sstderr > plot-git-ops
 gnuplot < gnuplot/generate-git-ops-plot.plot
 
-time logparser requestDurations ${LOG_FILE} +RTS -sstderr > clone-duration
+time logparser gitDurations ${LOG_FILE} +RTS -sstderr > clone-duration
 gnuplot < gnuplot/generate-git-durations.plot
 
 time logparser maxConn ${LOG_FILE} +RTS -sstderr > plot-all
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.