astrosearch / AstroSearch.hs

Full commit
{-# LANGUAGE OverloadedStrings #-}


  ./astrosearch <port number>

The search term is defined when you start the server with astroserver.


We flush stdout after most outputs to make sure it gets written to
any log file, for when run as a daemon. The expected tweet rate should
not be high enough to make this an issue (or to go to any fancier scheme).


module Main where

import qualified Data.Conduit.List as CL
import qualified Data.Set as S
import qualified Data.Text as T
import qualified Data.Text.IO as T

import Control.Exception (bracket)
import Control.Monad (forM_)
import Control.Monad.Trans.Class (lift)

import Data.Acid
import Data.Conduit (($$+-))
import Data.Time

import Network (PortNumber)

import System.Environment
import System.Exit (exitFailure)
import System.IO (hPutStrLn, hFlush, stderr, stdout)

import Common 
import AcidState

usage :: IO ()
usage = do
  progName <- getProgName
  hPutStrLn stderr $ "Usage: " ++ progName ++ " <port number>"
main :: IO ()
main = do
  args <- getArgs
  case args of
    [portStr] -> 
        case getPort portStr of
          Just port -> runSearch port
          _ -> usage
    _ -> usage

runSearch :: PortNumber -> IO ()
runSearch port = 
    (putStrLn ("Port: " ++ show port) >> hFlush stdout >> openStore port)
    (\acid -> getCurrentTime >>= 
              \stopTime -> update acid (AddSearchStop stopTime) >>
              closeAcidState acid)
    (\acid -> do
       terms <- query acid GetSearchTerms
       forM_ (S.toList terms) $ T.putStrLn . ("Search: " `T.append`)
       nt <- query acid GetNumberEvents
       putStrLn $ if nt == 0
	          then "There are no existing tweets."
	          else "There are " ++ show nt ++ " saved tweets."
       hFlush stdout

       startTime <- getCurrentTime
       _ <- update acid (AddSearchStart startTime)
       searchFor acid terms

TODO: deal with errors from the search / credential process
 - for example, have an exit with
     astrosearch: data: end of file
   which should really be caught
searchFor :: AcidState TweetStore -> S.Set T.Text -> IO ()
searchFor acid terms = do
  credFile <- credentialFile
  authorizeAction credFile $ \mgr cred -> do
    src <- streamSearch terms mgr cred
    src $$+- CL.mapM_ (lift . processTweet acid)

Given a response from the streaming search API of Twitter,
process it. If it is empty ("\r\n") then do nothing,
otherwise add it to the store and display part of it (we
hard code the section to select what, at present, is the
tweet time and the text, but this could change if Twitter
decides to re-order the JSON).

In earlier versions we just wrote out a summary of the tweet, which
is better, but this is faster.
processTweet :: AcidState TweetStore -> T.Text -> IO ()
processTweet acid txt | txt == "\r\n" = return ()
	              | otherwise     = update acid (AddTweetEvent txt) 
                                        >> T.putStrLn (summary txt)
                                        >> hFlush stdout

summary :: T.Text -> T.Text
-- summary = T.take 79
summary txt = 
    let t1 = T.drop 15 txt
        (date, t2) = T.splitAt 30 t1
        t3 = T.drop 64 t2
        tweet = T.take 140 t3
    in T.concat [date, " ", tweet]