Source

astrosearch / Utils.hs

{-# LANGUAGE OverloadedStrings #-}

module Utils 
       (
         get1
       , getJust
       , maybeRead
       , replaceEntities
       , findUsers
       )
where
  
import qualified Data.Text as T

import Data.Char (isAsciiLower, isAsciiUpper, isDigit)
import Data.List (foldl')
import Data.Maybe (fromMaybe)  

-- | Returns the first element of the array; calls error if the
--   list is empty or contains multiple elements.
get1 :: (Show a) => [a] -> a
get1 [x] = x
get1 []  = error "get1: empty array"
get1 xs  = error $ "get1: multiple elements\n" ++ show xs

-- | A wrapper around 'fromJust' that provides a label on error
--   to give some indication of where your invariant doesn't
--   actually hold.
getJust :: String -> Maybe a -> a
getJust lbl = fromMaybe (error ("getJust sent Nothing: " ++ lbl))  
  
-- | A version of 'read' that can fail gracefully.  
maybeRead :: (Read a) => String -> Maybe a
maybeRead x = case [y | (y,r) <- reads x, ("","") <- lex r] of
  [z]  -> Just z
  _    -> Nothing  
  
entities :: [(T.Text, T.Text)]
entities = [ ("&quot;", "\"")
           , ("&amp;", "&")
           , ("&lt;", "<")
           , ("&gt;", ">")
           -- TODO: do we really want to replace these in all cases?
           -- the initial version probably made sense but maybe not now
           , ("\t", " ")
           , ("\n", " ")
           , (T.singleton '\8220', "\"") -- it might be nice to convert the other way round but would need to do more parsing 
           , (T.singleton '\8221', "\"")
           ]
           
-- | Replace HTML entities from the input.
replaceEntities :: T.Text -> T.Text
replaceEntities txt = 
  let conv oldVal (inVal,outVal) = T.replace inVal outVal oldVal
  in foldl' conv txt entities

{-
Given some text, find the next user name (referenced as
@xxxxx) if there is one and the text following it. If
there is no more text, or no user, return T.empty.

According to
http://kagan.mactane.org/blog/2009/09/22/what-characters-are-allowed-in-twitter-usernames/

Twitter user names match /[a-zA-Z0-9_]{1,15}/

-}

isTwitterNameChar :: Char -> Bool
isTwitterNameChar c = 
  isAsciiLower c || isAsciiUpper c || isDigit c || c == '_'

findUser :: T.Text -> (Maybe T.Text, T.Text)
findUser "" = (Nothing, T.empty)
findUser itxt = 
  let rtxt = T.dropWhile (/='@') itxt
  in if T.null rtxt
     then (Nothing, T.empty)
     else case T.compareLength itxt 1 of
       GT -> let (l, r) = T.splitAt 15 $ T.tail rtxt
                 (uname, ll) = T.span isTwitterNameChar l
             in if T.null uname
                then (Nothing, T.empty)
                else (Just uname, T.append ll r)

       _ -> (Nothing, T.empty)

findUsers :: T.Text -> [T.Text]
findUsers = 
  let go us t | T.null t = us
              | otherwise = case findUser t of
                (Just u, r) -> go (u:us) r
                _ -> us
  in go []