Source

astrosearch / Avatars.hs

Full commit
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TupleSections #-}

{-
Usage:

  ./avatars <port> [<n tweets>]

Aim:

Read the last n tweets (or all of them if no argument
is given) and download the avatar image for the senders of these
tweets, if we do not already have them.

The images are stored in avatar-dir/<username>/<name>

-}

module Main where

import qualified Data.ByteString.Char8 as B8
import qualified Data.Text as T

import Control.Applicative ((<$>))
import Control.Exception (bracket)
import Control.Monad (when, filterM, liftM)
import Control.Monad.IO.Class (liftIO)

import Data.Acid
import Data.Acid.Advanced (Method(..))
import Data.Conduit
import Data.Conduit.Binary (sinkFile)
import Data.Either (rights)
import Data.Maybe (mapMaybe)

import Network (PortNumber)
import Network.HTTP.Conduit

import System.Directory (createDirectoryIfMissing, doesFileExist)
import System.Environment (getArgs, getProgName)
import System.Exit (exitFailure, exitSuccess)
import System.FilePath ((</>), takeFileName)

import Web.Twitter.Types

import AcidState

-- Store the location of the image, the directory for the image, the user screen name and the URL for the avatar
type AvatarInfo = (FilePath, FilePath, T.Text, URIString)

usage :: IO ()
usage = do
  progName <- getProgName
  putStrLn $ "Usage: " ++ progName ++ " <port number> [<n tweets>]"
  exitFailure
  
iconDir :: FilePath
iconDir = "avatar-dir"

main :: IO ()
main = do
  args <- getArgs
  when (null args) usage
  let (portStr:args2) = args
  case getPort portStr of
    Just port -> case args2 of
                   [] -> doit port Nothing
                   [nStr] -> case maybeRead nStr of
                               Nothing -> usage
                               n -> doit port n
                   _ -> usage

    _ -> usage

doit :: PortNumber -> Maybe Int -> IO ()
doit port mn = do
  twts <- case mn of
            Just n -> do
                    putStrLn $ "Reading last " ++ show n ++ " tweets."
                    getTweets n port

            _ -> do
                 putStrLn "Reading tweets"
                 getAllTweets port

  putStrLn $ "Looking for existing avatars from " ++ show (length twts) ++ " tweets"
  let avatars = findAvatars iconDir twts
  navatars <- findNewAvatars avatars
  when (null navatars) $ putStrLn "No new avatars to be found" >> exitSuccess
  createDirectoryIfMissing False iconDir
  getAvatars navatars

doQuery ::
    (QueryEvent event, MethodState event ~ TweetStore, MethodResult event ~ [T.Text])
    => event
    -> PortNumber
    -> IO [StreamingAPI]
doQuery qry port = 
    bracket
    (openStore port)
    closeAcidState
    (\acid -> (rights . map toTweet') `liftM` query acid qry)

getAllTweets :: PortNumber -> IO [StreamingAPI]
getAllTweets = doQuery GetAllTweetEvents

getTweets :: Int -> PortNumber -> IO [StreamingAPI]
getTweets n = doQuery (GetTweetEvents n)

-- | Get the userScreenName and userProfileImageURL from all the tweets
--   for which this information is available.
findAvatars :: FilePath -> [StreamingAPI] -> [AvatarInfo]
findAvatars dirName = mapMaybe (findAvatar dirName) . findUsers

findUsers :: [StreamingAPI] -> [User]
findUsers = mapMaybe findUser

findUser :: StreamingAPI -> Maybe User
findUser (SStatus Status {..}) = Just statusUser
findUser (SRetweetedStatus RetweetedStatus {..}) = Just rsUser
findUser _ = Nothing

findAvatar :: FilePath -> User -> Maybe AvatarInfo
findAvatar dirName User {..} = (toAvatarInfo dirName . (userScreenName,)) <$> userProfileImageURL

toAvatarInfo :: FilePath -> (T.Text, URIString) -> AvatarInfo
toAvatarInfo dirName (screenName,url) = 
    let pathName = dirName </> T.unpack screenName
        name     = takeFileName $ B8.unpack url
        fileName = pathName </> name
    in (fileName, pathName, screenName, url)

-- | Remove those avatar images that have already been downloaded.
findNewAvatars :: [AvatarInfo] -> IO [AvatarInfo]
findNewAvatars = filterM needAvatar

-- | Do we need to download this avatar?
needAvatar :: AvatarInfo -> IO Bool
needAvatar (fileName,_,_,_) = not `fmap` doesFileExist fileName

{-| Download the avatar images (aka icons or profile images) of all
the tweeters and re-tweeters in the TweetStore. Files that already
exist are skipped over.

The images are added to the given directory, which is assumed to
exist.

I could fork threads here but do not feel it is worth it at this time,
and not worried about the odd tweet that may get lost due to the
store being updated whilst this script is running.

-}

getAvatars :: [AvatarInfo] -> IO ()
getAvatars avatars = do
  putStrLn $ ">> Trying to download " ++ show (length avatars) ++ " avatars"
  withManager $ \mgr -> mapM_ (downloadAvatar mgr) avatars

-- TODO: catch exceptions from failures accessing the file via HTTP
downloadAvatar :: Manager -> AvatarInfo -> ResourceT IO ()
downloadAvatar mgr (fileName,pathName,_,url) = do
  liftIO $ createDirectoryIfMissing False pathName
  liftIO $ putStrLn $ "Downloading to " ++ fileName
  liftIO $ putStrLn $ "  from " ++ B8.unpack url
  req <- parseUrl $ B8.unpack url
  Response _ _ _ src <- http req mgr
  src $$+- sinkFile fileName