Commits

Doug Burke committed ae38543

ToRDF: can now read from JSON input from a text file

Comments (0)

Files changed (2)

 
 module Convert ( TweetData
                , toTweetData
+               , parseTweetData
                , processTweets
                , processLinks
                , filterOnText
                ) where
 
 import qualified Data.Acid as A
+import qualified Data.Aeson as Aeson
+import qualified Data.Aeson.Types as Aeson
 
 import qualified Control.Exception as CE
 
--- import qualified Data.ByteString as B
 import qualified Data.ByteString.Char8 as B8
+import qualified Data.ByteString.Lazy as LB
 import qualified Data.Conduit as C
 import qualified Data.Map as M
 import qualified Data.Set as S
 import qualified Network.TLS as TLS
 import qualified Network.URI as N
 
-import Control.Applicative (Applicative(..), (<$>))
-import Control.Monad (join, void)
+import Control.Applicative (Applicative(..), (<$>), (<|>))
+import Control.Monad (join, mzero, void)
 
 import Data.Acid hiding (Query)
 -- import Data.Char (isDigit, isHexDigit, chr, digitToInt)
 
 data TweetData = TDS Status | TDR RetweetedStatus
 
+-- | This is useful if you have dumped the JSON from Twitter,
+--   using 'astroquery json', and want to process it.
+--
+--   There is, at present, no information on failure.
+parseTweetData :: LB.ByteString -> Maybe [TweetData]
+parseTweetData js = 
+  Aeson.decode' js
+  >>= Aeson.parseMaybe (Aeson..: "statuses") 
+
+instance Aeson.FromJSON TweetData where
+  parseJSON v@(Aeson.Object _) =
+    let js :: Aeson.FromJSON a => Aeson.Parser a
+        js = Aeson.parseJSON v
+    in TDS <$> js <|> TDR <$> js <|> mzero
+
+  parseJSON _ = mzero
+
 toTweetData :: StreamingAPI -> Maybe TweetData
 toTweetData (SStatus s)          = Just (TDS s)
 toTweetData (SRetweetedStatus r) = Just (TDR r)
 
 Usage:
 
-  ./tordf <port | local>
+  ./tordf <port | local | filename>
   ./tordf <port | local> <n>
   ./tordf <port | local> <s> <n>
 
 is output instead. You must supply the graph name in this case
 and this can only be done with a local connection.
 
+The filename option is intended for testing; the file should
+contain the JSON returned by the Twitter query; e.g. the output
+of 'astroquery ... json'.
+
 TODO:
   how best to record tw:numFollowers since may want to track
     changes over time
 import qualified Data.Set as S
 import qualified Data.Text as T
 import qualified Data.Text.IO as T
+import qualified Data.Text.Lazy as LT
+import qualified Data.Text.Lazy.Encoding as TE
 import qualified Network.URI as N
 
 import Control.Applicative ((<$>), (<*>))
-import Control.Monad (when)
+import Control.Monad (liftM, when)
 
 import Data.Acid
 import Data.Either (rights)
 import System.IO (hPutStrLn, stderr)
 
 import AcidState
-import Convert (TweetData, toTweetData, filterOnText, processTweets, createSearchMetadata)
+import Convert (TweetData, toTweetData, parseTweetData, filterOnText, processTweets, createSearchMetadata)
 import RDFUtils (fromStreaming)
 
 -- | We can now assume that toTweet' will work on all the responses
     (_, Just n) -> query acid (GetTweetEvents n)
     _ -> query acid GetAllTweetEvents
 
+-- | JSON output from astroquery; errors out if the input 
+--   can not be converted to JSON. Unlike the per-tweet
+--   versions, this errors out if /any/ tweet can not be 
+--   converted.
+convertJSON :: 
+  T.Text
+  -> [TweetData]
+convertJSON txt = 
+  let ans = (parseTweetData . TE.encodeUtf8 . LT.fromStrict) txt
+  in maybe (error "Unable to decode") id ans
+
 -- | Create a graph containing metadata about the search
 createMetadata ::
   TweetConnection
   let header = "Usage: " ++ pName
       spacer = replicate (length header) ' '
   hPutStrLn stderr $ header ++ " <astrosearch port num | local> [[<starting tweet>] <num tweets>]"
+  hPutStrLn stderr $ header ++ " <filename>"
   hPutStrLn stderr $ spacer ++ "   [--filter <term>]"
   hPutStrLn stderr $ header ++ " local metadata <graph name>"
   hPutStrLn stderr $ "\n  - " ++ emsg
             Left emsg -> usage emsg
             Right (ms, mn, mt) -> getTweets conn ms mn mt >>= processTweets fromStreaming
             
-        _ -> usage ("Not a port number or the string local: " ++ connStr)
+        _ -> if null xs
+             then convertJSON `liftM` T.readFile connStr >>= processTweets fromStreaming
+             else usage ("Not a port number, the string local, or a file name: " ++ connStr)
 
-    [] -> usage "Missing connection (local or a port number)"
+    [] -> usage "Missing connection (local, port number, or file name)"