Source

astrosearch / Validate.hs

{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}

{-
Usage:
  ./validate <port>

Aim:

Check that there are no "repeat" tweets in the database.

-}

module Main where

import qualified Data.HashMap.Strict as HM
import qualified Data.List.NonEmpty as NE
import qualified Data.Text as T
import qualified Data.Text.IO as T

import Control.Monad (unless, when, forM_, liftM)

import Data.Acid
import Data.Either (rights)
import Data.List (foldl')
import Data.List.NonEmpty (NonEmpty( (:|) ))
import Data.Semigroup ((<>))

import System.Environment (getArgs, getProgName)
import System.Exit (exitFailure)

import Web.Twitter.Types

import AcidState

type TweetData = T.Text

type TweetMap = HM.HashMap StatusId (NonEmpty TweetData)

usage :: IO ()
usage = do
  progName <- getProgName
  putStrLn $ "Usage: " ++ progName ++ " <port number>"
  exitFailure
  
main :: IO ()
main = do
  args <- getArgs
  case args of
    [portStr] -> case getPort portStr of
      Just p -> openStore p >>= validate
      _ -> usage
      
    _ -> usage

addTweet :: TweetMap -> StreamingAPI -> TweetMap
addTweet omap (SStatus Status {..}) = 
    HM.insertWith (<>) statusId (statusText :| []) omap
addTweet omap (SRetweetedStatus RetweetedStatus {..}) = 
    HM.insertWith (<>) rsId (rsText :| []) omap
addTweet omap _ = omap

validate :: AcidState TweetStore -> IO ()
validate acid = do
  nall <- query acid GetNumberEvents
  putStrLn $ ">> " ++ show nall ++ " recorded events"
  twts <- (rights . map toTweet') `liftM` query acid GetAllTweetEvents
  let nt = length twts
  when (nt /= nall) $ putStrLn $ ">>   there are " ++ show (nall-nt) ++ " non-tweet events"

  let tmap = foldl' addTweet HM.empty twts
      norig = HM.size tmap

  putStrLn $ "Found " ++ show norig ++ " separate tweets"
  unless (norig == nt) $ do

      let not1 = not . null . NE.tail
          showLine = T.putStrLn . ("  " `T.append`)

      forM_ (filter not1 $ HM.elems tmap) $ \ (x :| xs) -> do
          putStrLn $ "# Tweet has " ++ show (1 + length xs) ++ " matches"
          when (any (/= x) xs) $ do
              showLine x
              mapM_ showLine xs
              putStrLn ""