1. Doug Burke
  2. astrosearch

Source

astrosearch / AcidState.hs

{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}

{- # LANGUAGE FlexibleInstances #-}
{- # LANGUAGE StandaloneDeriving #-}
{- # LANGUAGE RecordWildCards #-}

module AcidState 
       (
        -- ^ Store the results from the Streaming API
         TweetStore(..)
       , SearchTime(..)
       , emptyTweetStore
         
       , AddTweetEvent(..)
       , AddSearchStart(..)
       , AddSearchStop(..)
       , GetNumberEvents(..) 
       , GetTweetEvents(..)
       , GetAllTweetEvents(..)
       , GetSearchTerms(..)
       , GetSearchTimes(..)
                                           
        -- ^ Store results used by SimpleStats
       , SimpleStatsStore(..)
       , URIInfo(..)
       , emptySimpleStatsStore

       , AddOEmbed(..)
       , HaveOEmbed(..)
       , GetOEmbed(..)
       , AddURL(..)
       , HaveURL(..)
       , GetURL(..)

         -- ^ Utility routines
       , getPort
       , openStore
       , maybeRead

       , toTweet
       , toTweet'
       ) where

import qualified Data.Aeson as A
import qualified Data.Map as M
import qualified Data.Set as S
import qualified Data.Text as T
import qualified Data.Text.Lazy as LT
import qualified Data.Text.Lazy.Encoding as TE
import qualified Network.URI as N

import Control.Monad (liftM)
import Control.Monad.Reader (ask)                  
import Control.Monad.State (get, put)

import Data.Acid
import Data.Acid.Remote
import Data.Maybe (isJust, listToMaybe)
import Data.Ord (comparing)
import Data.Time
import Data.Typeable
import Data.SafeCopy

import Network

import Web.Twitter.Types

-- | A very simple model for search data from Twitter
data TweetStore0 = TweetStore0
    T.Text     -- ^ search term
    !Int       -- ^ the number of tweets
    [T.Text]   -- ^ non-empty responses from Twitter in (roughly)
               --   reverse time order (we append new entries to the
               --   start)
    [SearchTime] -- ^ when did the search start/stop
   deriving (Typeable)

data TweetStore = TweetStore
    (S.Set T.Text) -- ^ search terms (viewed as OR by Twitter)
    !Int         -- ^ the number of tweets
    [T.Text]     -- ^ non-empty responses from Twitter in (roughly)
                 --   reverse time order (we append new entries to the
                 --   start)
    [SearchTime] -- ^ when did the search start/stop
   deriving (Typeable)

data SearchTime =
                SearchStart UTCTime
                | SearchStop UTCTime
    deriving (Typeable)

$(deriveSafeCopy 0 'base ''SearchTime)
$(deriveSafeCopy 0 'base ''TweetStore0)
$(deriveSafeCopy 1 'extension ''TweetStore)

-- Migrate from version 0 to 1; for this upgrade
-- we hard code the additional search terms 
--    'aas 221' 'hackaas'
--
instance Migrate TweetStore where
  type MigrateFrom TweetStore = TweetStore0

  migrate (TweetStore0 term nt twts ts) =
    let terms = S.fromList $ term : ["aas 221", "hackaas"]
    in TweetStore terms nt twts ts

-- TODO: worry about strictness?
addTweetEvent :: T.Text -> Update TweetStore ()
addTweetEvent txt = do
  TweetStore terms cnt txts ts <- get
  put $ TweetStore terms (cnt+1) (txt : txts) ts

-- could call getCurrentTime from within addSearchStart/Stop
-- but for now send it in
addSearchStart :: UTCTime -> Update TweetStore ()
addSearchStart t = do
  TweetStore terms cnt txts ts <- get
  put $ TweetStore terms cnt txts (SearchStart t : ts)

addSearchStop :: UTCTime -> Update TweetStore ()
addSearchStop t = do
  TweetStore terms cnt txts ts <- get
  put $ TweetStore terms cnt txts (SearchStop t : ts)

getNumberEvents :: Query TweetStore Int
getNumberEvents = do
  TweetStore _ n _ _ <- ask
  return n

getTweetEvents :: Int -> Query TweetStore [T.Text]
getTweetEvents n = take n `liftM` getAllTweetEvents
  
getAllTweetEvents :: Query TweetStore [T.Text]  
getAllTweetEvents = do
  TweetStore _ _ ss _ <- ask
  return ss

getSearchTerms :: Query TweetStore (S.Set T.Text)
getSearchTerms = do
  TweetStore terms _ _ _ <- ask
  return terms

getSearchTimes :: Query TweetStore [SearchTime]
getSearchTimes = do
  TweetStore _ _ _ ts <- ask
  return ts

$(makeAcidic ''TweetStore
  [ 'addTweetEvent
  , 'addSearchStart
  , 'addSearchStop
  , 'getNumberEvents
  , 'getAllTweetEvents
  , 'getTweetEvents
  , 'getSearchTimes
  , 'getSearchTerms
  ])

-- an empty state

emptyTweetStore :: S.Set T.Text -> TweetStore
emptyTweetStore terms = 
  let sterms = S.map T.toLower terms
  in TweetStore sterms 0 [] []

data URIInfo =
	URIInfo
	[Int]    -- ^ chain of status codes in the order they were
                 --   processed (e.g. [302,200] for a redirect).
                 --   A value of 0 indicates an error occurred during
                 --   processing.
	N.URI    -- ^ resolved URI (i.e. after following any redirects);
                 --   for errors the original/unresolved URI is used
	       deriving (Eq, Ord, Show, Typeable)

-- | This is not a fully-compliant instance since the show
--   instance for URIs does not include passwords, but who
--   uses this anyway? Also, later versions of network include
--   an Ord instance for URI.
instance Ord N.URI where
  compare = comparing show

$(deriveSafeCopy 0 'base ''N.URIAuth)
$(deriveSafeCopy 0 'base ''N.URI)
$(deriveSafeCopy 0 'base ''URIInfo)

-- | Cache results from the Twitter OEmbed service
--   and URL discovery.
--
--   This is intended only to store a small number of results,
--   at least for the tweet id to JSON mapping.
--
data SimpleStatsStore = 
    SimpleStatsStore
    [(Integer, T.Text)]     -- ^ mapping from tweet id to JSON
    (M.Map String URIInfo)  -- ^ mapping from URL in tweet to a
                            --   status code and a resolved URI
    deriving (Typeable)

data OEmbedStore = 
    OEmbedStore
    [(Integer, T.Text)] -- mapping from tweet id to JSON
    deriving (Typeable)

$(deriveSafeCopy 0 'base ''OEmbedStore)
$(deriveSafeCopy 1 'extension ''SimpleStatsStore)

instance Migrate SimpleStatsStore where
  type MigrateFrom SimpleStatsStore = OEmbedStore

  migrate (OEmbedStore oes) = SimpleStatsStore oes M.empty

-- | The item will be added to the front of the store,
--   so it will effectively replace any previous version
addOEmbed :: Integer -> T.Text -> Update SimpleStatsStore ()
addOEmbed twId response = do
  SimpleStatsStore terms us <- get
  put $ SimpleStatsStore ((twId, response) : terms) us

haveOEmbed :: Integer -> Query SimpleStatsStore Bool
haveOEmbed twId = isJust `liftM` getOEmbed twId

getOEmbed :: Integer -> Query SimpleStatsStore (Maybe T.Text)
getOEmbed twId = do
  SimpleStatsStore terms _ <- ask
  return $ lookup twId terms

{-|
This replaces any existing knowledge about the URI.
-}
addURL :: String -> URIInfo -> Update SimpleStatsStore ()
addURL ustr ui = do
  SimpleStatsStore terms us <- get
  put $ SimpleStatsStore terms $ M.insertWith' const ustr ui us

haveURL :: String -> Query SimpleStatsStore Bool
haveURL u = isJust `liftM` getURL u

getURL :: String -> Query SimpleStatsStore (Maybe URIInfo)
getURL u = do
  SimpleStatsStore _ us <- ask
  return $ M.lookup u us

$(makeAcidic ''SimpleStatsStore
  [ 'addOEmbed
  , 'haveOEmbed
  , 'getOEmbed
  , 'addURL
  , 'haveURL
  , 'getURL
  ])

-- an empty state

emptySimpleStatsStore :: SimpleStatsStore
emptySimpleStatsStore = SimpleStatsStore [] M.empty

-- messing around with ports

maybeRead :: Read a => String -> Maybe a
maybeRead = fmap fst . listToMaybe . reads

getPort :: String -> Maybe PortNumber
getPort = fmap toEnum . maybeRead

openStore :: PortNumber -> IO (AcidState TweetStore)
openStore port = openRemoteState "localhost" (PortNumber port)    

-- | Force conversion of all elements within the JSON.    
toTweet' :: T.Text -> Either String StreamingAPI
toTweet' = A.eitherDecode' . TE.encodeUtf8 . LT.fromStrict

-- | Parse but do not convert
toTweet :: T.Text -> Either String StreamingAPI
toTweet = A.eitherDecode . TE.encodeUtf8 . LT.fromStrict