Commits

Stefan Saasen  committed 6bb179d

Add accumulated clone statistics that show the number of clones per repository.

The logparser output returns the number of clones per repository for the given
set of log files. The gnuplot script only plots the first 30 repositories.

  • Participants
  • Parent commits 686ef9a

Comments (0)

Files changed (7)

File gnuplot/repository-stats.plot

+set datafile separator "|"
+set terminal png size 1400,1000
+
+set xlabel "Repository"
+
+set grid
+set output "repository-stats.png"
+set auto x
+set ylabel "Repository - Clone statistics"
+set title "Number of clones per repository for the whole timeframe"
+
+set xtic out nomirror rotate by -45 font ",8"
+
+plot "repository-stats.dat" every ::::29 using 2:xticlabels(1) with lines title "Number of clones"
+

File logparser/logparser.cabal

                             cmdargs >= 0.10,
                             unordered-containers >= 0.2,
                             time,
+                            zip-conduit,
+                            HaXml,
                             directory,
                             aeson
     ghc-options:

File logparser/src/Main.hs

 
 import System.Environment (getArgs, withArgs)
 import Stash.Log.Analyser hiding (ProtocolStats)
-import Stash.Log.GitOpsAnalyser
+import qualified Stash.Log.GitOpsAnalyser as G
 import Stash.Log.Output
 import Stash.Log.Input
 import Control.Monad (liftM)
 appShortDesc :: String
 appShortDesc = "Logparser for the Atlassian Stash access logs"
 
-data LogParser = MaxConn        {files :: [FilePath]}
-                | CountRequests {files :: [FilePath]}
-                | GitOperations {files :: [FilePath], progressive :: Bool}
-                | GitDurations  {files :: [FilePath], progressive :: Bool}
-                | ProtocolStats {files :: [FilePath]}
-                | Count         {files :: [FilePath]}
-                | DebugParser   {files :: [FilePath], progressive :: Bool}
+data LogParser = MaxConn            {files :: [FilePath]}
+                | CountRequests     {files :: [FilePath]}
+                | GitOperations     {files :: [FilePath], progressive :: Bool}
+                | GitDurations      {files :: [FilePath], progressive :: Bool}
+                | ProtocolStats     {files :: [FilePath]}
+                | RepositoryStats   {files :: [FilePath]}
+                | Count             {files :: [FilePath]}
+                | DebugParser       {files :: [FilePath], progressive :: Bool}
              deriving (Data,Typeable,Show,Eq)
 
 progressiveFlags :: Bool
 protocolStats   = ProtocolStats {files = def &= args}
                 &= name "protocolStats" &= help "Aggregate the number of git operations per hour based on the access protocol (http(s) vs. SSH)"
 
+repositoryStats :: LogParser
+repositoryStats = RepositoryStats {files = def &= args}
+                &= name "repositoryStats" &= help "Show the number of git clone \
+                    \operations per repository"
+
 count :: LogParser
 count           = Count {files = def &= args}
                 &= name "count"         &= help "Count the number of lines in the given logfile(s)"
 
 
 mode :: Mode (CmdArgs LogParser)
-mode = cmdArgsMode $ modes [maxConn, countRequests, gitOperations, gitDurations, protocolStats, count, debugParser]
+mode = cmdArgsMode $ modes [maxConn, countRequests, gitOperations, gitDurations, 
+                            protocolStats, repositoryStats, count, debugParser]
         &= help appShortDesc
         &= program appName &= summary (appName ++ " " ++ appVersion)
         &= verbosity
 run :: LogParser -> IO ()
 run (MaxConn files')                     = stream concurrentConnections printPlotDataConcurrentConn newRunConfig "printPlotDataConcurrentConn" files'
 run (CountRequests files')               = stream countRequestLines print newRunConfig "countRequestLines" files'
-run (GitOperations files' progressive')  = stream analyseGitOperations printPlotDataGitOps (RunConfig progressive') "printPlotDataGitOps" files'
-run (GitDurations files' progressive')   = stream gitRequestDuration printGitRequestDurations (RunConfig progressive') "gitRequestDuration" files'
+run (GitOperations files' progressive')  = stream G.analyseGitOperations printPlotDataGitOps (RunConfig progressive') "printPlotDataGitOps" files'
+run (GitDurations files' progressive')   = stream G.gitRequestDuration printGitRequestDurations (RunConfig progressive') "gitRequestDuration" files'
 run (ProtocolStats files')               = stream protocolStatsByHour printProtocolData newRunConfig "printProtocolData" files'
+run (RepositoryStats files')             = stream G.repositoryStats printRepoStatsData newRunConfig "printRepoStatsData" files'
 run (Count files')                       = printCountLines countLines files'
 run (DebugParser files' progressive')    = stream showLines print (RunConfig progressive') "showLines" files'
 

File logparser/src/Stash/Log/GitOpsAnalyser.hs

 ( GitOperationStats(..)
 , analyseGitOperations
 , RequestDurationStat(..)
+, RepositoryStat(..)
 , gitRequestDuration
 , isRefAdvertisement
 , protocolCount
+, repositoryStats
 ) where
 
 import qualified Data.ByteString.Char8 as S
 import qualified Data.HashMap.Strict as M
 import Data.String.Utils (split)
-import Data.List (foldl', groupBy)
+import Data.List (foldl', groupBy, sortBy)
 import Data.Maybe (isJust, mapMaybe, fromMaybe)
 import Data.Function (on)
 import Text.Printf (printf)
 gitRequestDuration :: Input -> [RequestDurationStat]
 gitRequestDuration rawLines = collectRequestDurations rawLines authenticatedGitOp
 
+-- | Return the number of clone operations per repository
+
+data RepositoryStat = RepositoryStat {
+    getName             :: S.ByteString
+  , getNumberOfClones   :: Int
+} | StatUnavailable deriving (Show)
+
+repositoryStats :: Input -> [RepositoryStat]
+repositoryStats xs =
+     let gitOps     = filter (\l -> isGitOperation l && isClone l) $ parseLogLines xs
+         perRepo    = groupByRepo $ sortBy (compare `on` f) gitOps
+         sortedPerRepo = sortBy (flip compare `on` getNumberOfClones) $ map t perRepo
+     in  sortedPerRepo
+     where groupByRepo = groupBy ((==) `on` f)
+           f a         = let slug = extractRepoSlug $ getAction a
+                         in slug
+           t []             = StatUnavailable
+           t logLines@(x:_) = RepositoryStat (S.pack $ fromMaybe "n/a" $ extractRepoSlug $ getAction x) (length logLines)
+
+
 
 -- =================================================================================
 

File logparser/src/Stash/Log/Output.hs

 , printPlotDataConcurrentConn
 , printPlotDataGitOps
 , printCountLines
+, printRepoStatsData
 ) where
 
 
         mapM_ (\(RequestDurationStat date clientIp [cm,fm,sm,pm,rm] [c,f,s,p,r] username)
                 -> printf "%s|%d|%d|%d|%d|%d|%d|%d|%d|%d|%d|%s|%s\n" (show date) c cm f fm s sm p pm r rm clientIp (S.unpack username)) plotData
 
+printRepoStatsData :: [RepositoryStat] -> IO ()
+printRepoStatsData xs = do
+        printf "# Repository name | Number of clones \n"
+        mapM_ p xs
+    where p (RepositoryStat name num) = printf "%s|%d\n" (S.unpack name) num
+          p _                         = printf "N/A|0\n"
+
 printCountLines :: (Show a) => (L.ByteString -> a) -> [FilePath] -> IO ()
 printCountLines f path = print . f =<< readFiles path
 

File logparser/src/Stash/Log/Parser.hs

-{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE OverloadedStrings, RecordWildCards #-}
 
 module Stash.Log.Parser
 ( Action(..)
 , Input
 , parseLogLine
 , parseLogLines
+, extractRepoSlug
 , isIncoming
 , isOutgoing
 , isOutgoingLogLine
 import qualified Data.ByteString.Char8 as S
 import qualified Data.ByteString.Lazy.Char8 as L
 import qualified Data.Text as T
-import Data.Attoparsec.Char8 hiding (char, space, take)
-import Prelude hiding (takeWhile)
+import Data.Attoparsec.Char8 hiding (char, space, take, takeWhile)
 import Data.ByteString.Char8 (readInteger, readInt)
-import Data.String.Utils (split)
+import qualified Data.String.Utils as UT
 import Data.Maybe (mapMaybe)
+import Data.List (isPrefixOf)
 import Text.Printf (printf)
 
 type Input = [L.ByteString]
 parseAction :: Parser Action
 parseAction = choice [parseSshAction, parseHttpAction]
 
+
+-- | Return the repo slug from the logged action.
+--
+-- E.g. for "GET /scm/CONF/confluence.git/info/refs HTTP/1.1" this would return:
+--      "/CONF/confluence.git"
+extractRepoSlug :: Action -> Maybe String
+extractRepoSlug Action{..} = let elems = UT.split ("/" :: String) (S.unpack getPath)
+                                 f     = takeWhile (\s -> s /= "info" && not ("git" `isPrefixOf` s)) . dropWhile (`elem` ["", "scm", "git"])
+                             in Just $ '/' : UT.join "/" (f elems)
+
+
 parseSshAction :: Parser Action
 parseSshAction = do
     method <- takeTill (== '\'')
     labels_ <- logEntry
     duration <- parseDuration
     sessionId <- logEntry
-    let labels = map trim $ split "," (S.unpack labels_)
+    let labels = map trim $ UT.split "," (S.unpack labels_)
         username = if rawUsername == "-" then Nothing else Just rawUsername
     return $ LogLine remoteAddress protocol requestId username date
                     action details labels duration sessionId

File logparser/tests/Properties.hs

 parsedLogLine5 = parseLogLine inputLine
     where inputLine = "172.16.3.7 | ssh | o357x407998x2 | atlaseye_user | 2013-03-05 05:57:20,505 | SSH - git-upload-pack '/CONF/teamcal.git' | - | clone | 145 | ofq0l6 | "
 
+parsedLogLine6 = parseLogLine inputLine
+    where inputLine = "172.26.24.201,127.0.0.1 | https | o1167x35420x10 | klaus.tester | 2013-06-13 19:27:27,302 \
+    \| \"POST /scm/testlab/point-jmeter.git/git-upload-pack HTTP/1.1\" | \"\" \"git/1.7.8.2\" | clone, cache:hit | 3499 | 8wovkm | "
+
 test_parseLogEntryDate = H.assertEqual
     "Should parse the date correctly"
     (LogDate 2012 8 22 18 32 08 505)
     "/CONF/teamcal.git"
     (getPath $ getAction $ fromJust parsedLogLine3)
 
+test_extractActionSsh = H.assertEqual
+    "Should parse the action correctly for ssh"
+    (Just "/CONF/teamcal.git")
+    (extractRepoSlug $ getAction $ fromJust parsedLogLine3)
+
+test_extractActionHTTP = H.assertEqual
+    "Should parse the action correctly for http"
+    (Just "/ATLASSIAN/jira.git")
+    (extractRepoSlug $ getAction $ fromJust parsedLogLine)
+
+test_extractActionHTTPUploadPack = H.assertEqual
+    "Should parse the action correctly for an upload-pack operation via http"
+    (Just "/testlab/point-jmeter.git")
+    (extractRepoSlug $ getAction $ fromJust parsedLogLine6)
+
 test_classifyRefAdv = H.assertBool
     "Should identify ref advertisement"
     (isRefAdvertisement $ fromJust parsedLogLine)
         ,testCase "parser/parse log entry date" test_parseLogEntryDate
         ,testCase "parser/parse username (Just)" test_logLineParseUsernameAsJust
         ,testCase "parser/parse username (Nothing)" test_logLineParseUsernameAsNothing
+        ,testCase "parser/extract Action HTTP" test_extractActionHTTP
+        ,testCase "parser/extract Action HTTP (upload-pack)" test_extractActionHTTPUploadPack
+        ,testCase "parser/extract Action SSH" test_extractActionSsh
       ]
     ]