Commits

Stefan Saasen committed 23292ae

Filter log files based on date

Comments (0)

Files changed (4)

logparser/logparser.cabal

                             text,
                             unordered-containers,
                             aeson,
+                            bzlib,
                             QuickCheck >= 2.4.0.1
 
 executable logparser

logparser/src/Main.hs

 {-# LANGUAGE OverloadedStrings #-}
 module Main where
 
-import qualified Data.ByteString.Lazy.Char8 as L
-import qualified Data.ByteString.Char8 as S
-import qualified Data.Map as M
-import Stash.Log.Parser
 import Stash.Log.Analyser
 import Stash.Log.GitOpsAnalyser
 import Stash.Log.Output
-import Stash.Log.File (sortLogFiles, toLines, readFiles)
 import Data.Default
-import Data.List (isSuffixOf)
-import Data.Maybe (maybe)
-import Data.Aeson
 import UI.Command
 import Prelude hiding (takeWhile)
-import Control.Monad (liftM)
 import Control.Monad.Trans (liftIO)
 
 -- =================================================================================

logparser/src/Stash/Log/File.hs

 ( sortLogFiles
 , toLines
 , readFiles
+, FileInfo(..)
+, extractFileInfo
+, isFileNewer
 ) where
 
 import qualified Data.ByteString.Lazy.Char8 as L
 import qualified Codec.Compression.BZip as BZip
-import Data.List (sortBy)
 import Data.Monoid (mappend)
-import Data.List (isSuffixOf)
+import Data.List (isSuffixOf, sortBy)
 import Data.String.Utils (split)
 import System.Path.NameManip
 import Control.Monad (liftM)
+import Data.Maybe (fromMaybe, maybe)
+import Debug.Trace
+
+data FileInfo = FileInfo {
+     year       :: String
+    ,month      :: String
+    ,day        :: String
+    ,counter    :: Int
+} deriving (Show, Eq, Ord)
+
+type Date = String
+
+-- | Check whether the log file is more recent than the given date. This is
+-- solely based on the date that is part of the filename.
+isFileNewer :: FilePath -> Date -> Bool
+isFileNewer file date = (Just $ base (unpack date)) <= (extractFileInfo file)
+        where base (year:month:day:_) = FileInfo year month day 0
+              unpack d                = split "-" d
 
 -- | Sort the logfiles by date and log file sequence number
 -- The logfile naming scheme is: "atlassian-stash-access-2012-11-29.0.log(.bz2)"
 sortLogFiles :: [FilePath] -> [FilePath]
 sortLogFiles = sortBy logFilePred
-    where extractFile = last . slice_path
-          sortPred (date1, num1) (date2, num2) = compare date1 date2 `mappend` compare num1 num2
+    where sortPred (date1, num1) (date2, num2) = compare date1 date2 `mappend` compare num1 num2
           logFilePred logFileName1 logFileName2 = sortPred (extractSortPairs logFileName1) (extractSortPairs logFileName2)
-          extractSortPairs path = let elems = drop 3 $ split "-" $ extractFile path
-                                  in case elems of
-                                     (year:month:(rest:_)) -> case split "." rest of
-                                                             (day:num:_) -> (year ++ "-" ++ month ++ "-" ++ day, read num :: Int)
-                                                             _           -> ("", 0)
-                                     _                 -> ("9999", 0)
+          extractSortPairs path = maybe ("9999", 0) asPair $ extractFileInfo path
+          asPair (FileInfo year month day counter) = (year ++ "-" ++ month ++ "-" ++ day, counter)
+          asPair _                                 = ("", 0)
 
+-- | Try to extract the FileInfo out of the given file. This function assumes
+-- that the given file follows the naming scheme for the access log archive
+-- files.
+extractFileInfo :: FilePath -> Maybe FileInfo
+extractFileInfo path = let elems = drop 3 $ split "-" $ extractFile path
+                       in case elems of
+                               (year:month:(rest:_)) -> case split "." rest of
+                                                             (day:num:_) -> Just $ FileInfo year month day(read num :: Int)
+                                                             _           -> Nothing
+                               _                     -> Nothing
 
+-- | Read the list of files and return a list of lines. The input files will be
+-- filtered using the function (FilePath -> Bool)
 toLines :: (FilePath -> Bool) -> [FilePath] -> IO [L.ByteString]
 toLines p files = liftM L.lines $ readFiles p files
 
+-- | Read the list of files and turn them into a lazy ByteString. The input files will be
+-- filtered using the function (FilePath -> Bool)
 readFiles :: (FilePath -> Bool) -> [FilePath] -> IO L.ByteString
-readFiles f files = fmap L.concat . mapM readCompressedOrUncompressed . filter f $ sortLogFiles files
+readFiles f files = trace ("filteredFiles: " ++ (show filteredFiles))  fmap L.concat . mapM readCompressedOrUncompressed $ filteredFiles
+            where filteredFiles = filter f $ sortLogFiles files
+
+-- =================================================================================
+
+extractFile :: FilePath -> String
+extractFile = last . slice_path
 
 readCompressedOrUncompressed :: FilePath -> IO L.ByteString
 readCompressedOrUncompressed path = if ".bz2" `isSuffixOf` path

logparser/src/Stash/Log/Output.hs

 import Stash.Log.GitOpsAnalyser
 import Stash.Log.File
 import Text.Printf (printf)
-import Data.Aeson
-
-
-readConfig :: String -> IO (Maybe String)
-readConfig key = do
-        json <- L.readFile "logparser.state"
-        return $ (decode json :: Maybe (M.Map String String)) >>= M.lookup key
-
-readLogFiles :: String -> [FilePath] -> IO [L.ByteString]
-readLogFiles key path = do
-        date <- readConfig key
-        toLines (createPredicate date) path
-        where createPredicate maybeDate = maybe (\_ -> True) (\date -> (\file -> True)) maybeDate
+import Data.Aeson (decode)
 
 
 generateProtocolData :: (Input -> [ProtocolStats]) -> [FilePath] -> IO ()
                 -> printf "%s|%d|%d|%d|%d|%d|%d|%d|%d|%d|%d|%s|%s\n" (show date) c cm f fm s sm p pm r rm clientIp (S.unpack username)) plotData
 
 parseAndPrint :: (Show a) => (Input -> a) -> [FilePath] -> IO ()
-parseAndPrint f path = print . f . L.lines =<< readFiles (\x -> True) path
+parseAndPrint f path = print . f . L.lines =<< readFiles (const True) path
 
 printCountLines :: (Show a) => (L.ByteString -> a) -> [FilePath] -> IO ()
-printCountLines f path = print . f =<< readFiles (\x -> True) path
+printCountLines f path = print . f =<< readFiles (const True) path
 
 formatLogDate :: LogDate -> String
 formatLogDate date = printf "%04d-%02d-%02d %02d:%02d" (getYear date) (getMonth date)
                             (getDay date) (getHour date) (getMinute date)
+
+-- =================================================================================
+
+readConfig :: String -> IO (Maybe String)
+readConfig key = do
+        json <- L.readFile "logparser.state"
+        return $ (decode json :: Maybe (M.Map String String)) >>= M.lookup key
+
+readLogFiles :: String -> [FilePath] -> IO [L.ByteString]
+readLogFiles key path = do
+        date <- readConfig key
+        toLines (createPredicate date) path
+        where createPredicate = maybe (const True) (\date -> (\file -> isFileNewer file date))
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.