Source

astrosearch / AstroQuery.hs

Full commit
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE ScopedTypeVariables #-}

module Main (main) where

import qualified Data.Set as S
import qualified Data.Text as T
import qualified Data.Text.IO as T

import Data.Acid
import Data.Either (lefts, rights)

import Network

import System.Environment
import System.Exit (exitFailure)

import Control.Exception (bracket)
import Control.Monad (forM_, when)

import Web.Twitter.Types

import AcidState

usage :: IO ()
usage = do
  progName <- getProgName
  putStrLn $ "Usage: " ++ progName ++ " <port number> args.."
  putStrLn "  where args... is one of"
  putStrLn "    size         - report size of database"
  putStrLn "    terms        - what are the search terms"
  putStrLn "    info [n]     - report on the last n tweets (name,text,time,...)"
  putStrLn "    show [n]     - dump n latest tweets (user name + text)"
  putStrLn "    raw [n]      - dump n latest tweets (raw text)"
  putStrLn "    validate [n] - can last n tweets be converted?"
  putStrLn "    convert [n]  - convert to Haskell and dump output"
  putStrLn "    times        - display the search start/end times"
  putStrLn "    checkpoint   - create a checkpoint"
  exitFailure
  
main :: IO ()
main = do
  args <- getArgs
  case args of
    (port:xs) -> case getPort port of
      Just p -> argParse p xs
      _ -> usage
      
    _ -> usage
    
argParse :: PortNumber -> [String] -> IO ()    
argParse _ [] = usage
argParse port [arg] | arg == "size"       = reportSize port
                    | arg == "times"      = reportTimes port
                    | arg == "info"       = infoTweets Nothing port
                    | arg == "show"       = showTweets Nothing port
                    | arg == "raw"        = rawTweets Nothing port
                    | arg == "validate"   = validateTweets Nothing port
                    | arg == "convert"    = convertTweets Nothing port
                    | arg == "checkpoint" = checkPoint port
                    | arg == "terms"      = reportTerms port
                    | otherwise           = usage

argParse port (arg:nstr:[]) = 
    case maybeRead nstr of
      Nothing -> usage
      mn -> case arg of
              "info" -> infoTweets mn port
              "show" -> showTweets mn port
              "raw" -> rawTweets mn port
              "validate" -> validateTweets mn port
              "convert" -> convertTweets mn port
              _ -> usage


argParse _ _ = usage                                              
  
doAction :: (AcidState TweetStore -> IO ()) -> PortNumber -> IO ()
doAction act port =
    bracket
    (openStore port)
    closeAcidState
    act

processNTweets :: 
    ([T.Text] -> IO ())
    -> Int
    -> PortNumber
    -> IO ()
processNTweets f n = 
    doAction (\acid -> query acid (GetTweetEvents n) >>= f)

processAllTweets ::
    ([T.Text] -> IO ())
    -> PortNumber
    -> IO ()
processAllTweets f = 
    doAction (\acid -> query acid GetAllTweetEvents >>= f)

reportSize :: PortNumber -> IO ()                                      
reportSize = 
    doAction (\acid -> query acid GetNumberEvents >>= \n -> putStrLn ("Number of tweets: " ++ show n))

reportTerms :: PortNumber -> IO ()
reportTerms = 
    doAction (\acid -> query acid GetSearchTerms >>= mapM_ (T.putStrLn . ("Search term: " `T.append`)) . S.toList)

reportTimes :: PortNumber -> IO ()
reportTimes =
    doAction (\acid -> do
                ts <- query acid GetSearchTimes
                putStrLn "Search start/stop times"
                mapM_ printSearchTime ts
             )
  
printSearchTime :: SearchTime -> IO ()
printSearchTime (SearchStart t) = putStrLn $ "  start  " ++ show t
printSearchTime (SearchStop  t) = putStrLn $ "  stop   " ++ show t

checkPoint :: PortNumber -> IO ()
checkPoint port = do
  acid <- openStore port
  createCheckpoint acid
  closeAcidState acid
  putStrLn "Checkpoint created."

showItems :: [T.Text] -> IO ()
showItems [] = putStrLn "\nThere are no tweets.\n"
showItems xs = do
  let ntot = show $ length xs
  forM_ (zip [1..] xs) $ \(n::Int,x) -> putStrLn ("# [" ++ show n ++ "/" ++ ntot ++ "] ") >> T.putStrLn x

rawTweets :: Maybe Int -> PortNumber -> IO ()
rawTweets mn p = 
    case mn of
      Just n -> processNTweets showItems n p
      _ -> processAllTweets showItems p

validateTweets :: Maybe Int -> PortNumber -> IO ()
validateTweets mn port = 
    let f xs = do
          let ans = lefts $ map toTweet xs
              ntot = length xs
          case ans of
            [] -> putStrLn $ "All " ++ show ntot ++ " tweets are valid"
            _  -> do
                 putStrLn $ "# There are " ++ show (length ans) ++ " out of " ++ show ntot ++ " tweets that are invalid."
                 mapM_ (\(nn::Int,t) -> putStrLn (concat ["[", show nn, "] ", t])) $ zip [1..] ans

    in case mn of
         Just n -> processNTweets f n port
         _ -> processAllTweets f port

-- We just ignore invalid tweets for now
convertTweets :: Maybe Int -> PortNumber -> IO ()
convertTweets mn port = 
    let f xs = do
          let ans = rights $ map toTweet xs
              ntot = length xs
              nans = length ans
          when (nans < ntot) $ putStrLn $ "# There were " ++ show (ntot-nans) ++ " invalid tweets"
          mapM_ (\(nn::Int,t) -> putStrLn (concat ["[", show nn, "] ", show t])) $ zip [1..] ans

    in case mn of
         Just n -> processNTweets f n port
         _ -> processAllTweets f port

infoTweets :: Maybe Int -> PortNumber -> IO ()
infoTweets mn port =
    let f = mapM_ (T.putStrLn . tweetToInfo) . rights . map toTweet'
    in case mn of
         Just n -> processNTweets f n port
         _ -> processAllTweets f port
    
showTweets :: Maybe Int -> PortNumber -> IO ()
showTweets mn port =
    let f xs = do
          let ans = rights $ map toTweet' xs
              ntot = length xs
          case ans of
            [] -> putStrLn "No valid tweets found"
            _  -> do
                 putStrLn $ "# There are " ++ show (length ans) ++ " out of " ++ show ntot ++ " valid tweets."
                 mapM_ (putStrLn . (\(nn::Int,t) -> tweetToString nn t)) $ zip [1..] ans

    in case mn of
         Just n -> processNTweets f n port
         _ -> processAllTweets f port

-- simple dump of information
tweetToString :: Int -> StreamingAPI -> String
tweetToString n (SStatus s) = 
    let sn = userScreenName $ statusUser s
        txt = statusText s
    in concat ["#", show n, " ", T.unpack sn, ": ", T.unpack txt]
tweetToString n (SRetweetedStatus rs) = 
    let rtuser = userScreenName . rsUser $ rs
        s = rsRetweetedStatus rs
        sn = userScreenName $ statusUser s
        rtxt = rsText rs
    in concat ["#", show n, " (RT ", T.unpack rtuser, " of ", T.unpack sn, ") ", T.unpack rtxt]
tweetToString n (SDelete d) = concat ["#", show n, " DELETE: ", show d]
tweetToString n s = concat ["#", show n, " OTHER: ", show s]

-- more information than tweetToString
tweetToInfo :: StreamingAPI -> T.Text
tweetToInfo (SStatus Status {..}) = 
    T.intercalate "," 
         [ T.pack $ show statusId
         , T.pack $ show (userId statusUser)
         , userScreenName statusUser
         , T.pack $ drop 4 statusCreatedAt
         , "n/a"
         , T.replace "\n" "\\n" statusText
         ]
tweetToInfo (SRetweetedStatus RetweetedStatus {..}) = 
    T.intercalate "," 
         [ T.pack $ show rsId
         , T.pack $ show (userId rsUser)
         , userScreenName rsUser
         , T.pack $ drop 4 rsCreatedAt
         , T.pack $ show $ statusId rsRetweetedStatus
         , T.replace "\n" "\\n" rsText
         ]
tweetToInfo (SEvent _) = "# <event>"
tweetToInfo (SDelete _) = "# <deletion>"
tweetToInfo (SFriends _) = "# <friends>"
tweetToInfo (SUnknown _) = "# <unknown>"