Commits

Mario Blažević  committed 8ece24d

Renamed monoid-attoparsec to picoparsec, adjusted the copyrights.

  • Participants
  • Parent commits 0ebc933

Comments (0)

Files changed (32)

File Data/Attoparsec.hs

--- |
--- Module      :  Data.Attoparsec
--- Copyright   :  Bryan O'Sullivan 2007-2011
--- License     :  BSD3
---
--- Maintainer  :  bos@serpentine.com
--- Stability   :  experimental
--- Portability :  unknown
---
--- Simple, efficient combinator parsing for 'ByteString' strings,
--- loosely based on the Parsec library.
-
-module Data.Attoparsec
-    (
-      module Data.Attoparsec.Monoid
-    ) where
-
-import Data.Attoparsec.Monoid

File Data/Attoparsec/ByteString/FastSet.hs

-{-# LANGUAGE BangPatterns, MagicHash #-}
-
------------------------------------------------------------------------------
--- |
--- Module      :  Data.Attoparsec.ByteString.FastSet
--- Copyright   :  Bryan O'Sullivan 2008
--- License     :  BSD3
---
--- Maintainer  :  bos@serpentine.com
--- Stability   :  experimental
--- Portability :  unknown
---
--- Fast set membership tests for 'Word8' and 8-bit 'Char' values.  The
--- set representation is unboxed for efficiency.  For small sets, we
--- test for membership using a binary search.  For larger sets, we use
--- a lookup table.
---
------------------------------------------------------------------------------
-module Data.Attoparsec.ByteString.FastSet
-    (
-    -- * Data type
-      FastSet
-    -- * Construction
-    , fromList
-    , set
-    -- * Lookup
-    , memberChar
-    , memberWord8
-    -- * Debugging
-    , fromSet
-    -- * Handy interface
-    , charClass
-    ) where
-
-import Data.Bits ((.&.), (.|.))
-import Foreign.Storable (peekByteOff, pokeByteOff)
-import GHC.Base (Int(I#), iShiftRA#, narrow8Word#, shiftL#)
-import GHC.Word (Word8(W8#))
-import qualified Data.ByteString as B
-import qualified Data.ByteString.Char8 as B8
-import qualified Data.ByteString.Internal as I
-import qualified Data.ByteString.Unsafe as U
-
-data FastSet = Sorted { fromSet :: !B.ByteString }
-             | Table  { fromSet :: !B.ByteString }
-    deriving (Eq, Ord)
-
-instance Show FastSet where
-    show (Sorted s) = "FastSet Sorted " ++ show (B8.unpack s)
-    show (Table _) = "FastSet Table"
-
--- | The lower bound on the size of a lookup table.  We choose this to
--- balance table density against performance.
-tableCutoff :: Int
-tableCutoff = 8
-
--- | Create a set.
-set :: B.ByteString -> FastSet
-set s | B.length s < tableCutoff = Sorted . B.sort $ s
-      | otherwise                = Table . mkTable $ s
-
-fromList :: [Word8] -> FastSet
-fromList = set . B.pack
-
-data I = I {-# UNPACK #-} !Int {-# UNPACK #-} !Word8
-
-shiftR :: Int -> Int -> Int
-shiftR (I# x#) (I# i#) = I# (x# `iShiftRA#` i#)
-
-shiftL :: Word8 -> Int -> Word8
-shiftL (W8# x#) (I# i#) = W8# (narrow8Word# (x# `shiftL#` i#))
-
-index :: Int -> I
-index i = I (i `shiftR` 3) (1 `shiftL` (i .&. 7))
-{-# INLINE index #-}
-
--- | Check the set for membership.
-memberWord8 :: Word8 -> FastSet -> Bool
-memberWord8 w (Table t)  =
-    let I byte bit = index (fromIntegral w)
-    in  U.unsafeIndex t byte .&. bit /= 0
-memberWord8 w (Sorted s) = search 0 (B.length s - 1)
-    where search lo hi
-              | hi < lo = False
-              | otherwise =
-                  let mid = (lo + hi) `quot` 2
-                  in case compare w (U.unsafeIndex s mid) of
-                       GT -> search (mid + 1) hi
-                       LT -> search lo (mid - 1)
-                       _ -> True
-
--- | Check the set for membership.  Only works with 8-bit characters:
--- characters above code point 255 will give wrong answers.
-memberChar :: Char -> FastSet -> Bool
-memberChar c = memberWord8 (I.c2w c)
-{-# INLINE memberChar #-}
-
-mkTable :: B.ByteString -> B.ByteString
-mkTable s = I.unsafeCreate 32 $ \t -> do
-            _ <- I.memset t 0 32
-            U.unsafeUseAsCStringLen s $ \(p, l) ->
-              let loop n | n == l = return ()
-                         | otherwise = do
-                    c <- peekByteOff p n :: IO Word8
-                    let I byte bit = index (fromIntegral c)
-                    prev <- peekByteOff t byte :: IO Word8
-                    pokeByteOff t byte (prev .|. bit)
-                    loop (n + 1)
-              in loop 0
-
-charClass :: String -> FastSet
-charClass = set . B8.pack . go
-    where go (a:'-':b:xs) = [a..b] ++ go xs
-          go (x:xs) = x : go xs
-          go _ = ""

File Data/Attoparsec/Combinator.hs

-{-# LANGUAGE BangPatterns, CPP #-}
--- |
--- Module      :  Data.Attoparsec.Combinator
--- Copyright   :  Daan Leijen 1999-2001, Bryan O'Sullivan 2009-2010
--- License     :  BSD3
---
--- Maintainer  :  bos@serpentine.com
--- Stability   :  experimental
--- Portability :  portable
---
--- Useful parser combinators, similar to those provided by Parsec.
-module Data.Attoparsec.Combinator
-    (
-    -- * Combinators
-      try
-    , (<?>)
-    , choice
-    , count
-    , option
-    , many'
-    , many1
-    , many1'
-    , manyTill
-    , manyTill'
-    , sepBy
-    , sepBy'
-    , sepBy1
-    , sepBy1'
-    , skipMany
-    , skipMany1
-    , eitherP
-    -- * Parsing individual chunk elements
-    , satisfyElem
-    -- * State observation and manipulation functions
-    , endOfInput
-    , atEnd
-    ) where
-
-import Control.Applicative (Alternative(..), Applicative(..), empty, liftA2,
-                            (<|>), (*>), (<$>))
-import Control.Monad (MonadPlus(..))
-#if !MIN_VERSION_base(4,2,0)
-import Control.Applicative (many)
-#endif
-
-import Data.Attoparsec.Internal (demandInput, ensure, put, wantInput)
-import Data.Attoparsec.Internal.Types (Chunk(..), Input(..), Parser(..), addS)
-import Data.Attoparsec.Internal.Types (More(..))
-import Data.ByteString (ByteString)
-import Data.Text (Text)
-import qualified Data.Attoparsec.Zepto as Z
-
--- | Attempt a parse, and if it fails, rewind the input so that no
--- input appears to have been consumed.
---
--- This combinator is provided for compatibility with Parsec.
--- Attoparsec parsers always backtrack on failure.
-try :: Parser t a -> Parser t a
-try p = p
-{-# INLINE try #-}
-
--- | Name the parser, in case failure occurs.
-(<?>) :: Parser t a
-      -> String                 -- ^ the name to use if parsing fails
-      -> Parser t a
-p <?> msg0 = Parser $ \i0 a0 m0 kf ks ->
-             let kf' i a m strs msg = kf i a m (msg0:strs) msg
-             in runParser p i0 a0 m0 kf' ks
-{-# INLINE (<?>) #-}
-infix 0 <?>
-
--- | @choice ps@ tries to apply the actions in the list @ps@ in order,
--- until one of them succeeds. Returns the value of the succeeding
--- action.
-choice :: Alternative f => [f a] -> f a
-choice = foldr (<|>) empty
-{-# SPECIALIZE choice :: [Parser ByteString a] -> Parser ByteString a #-}
-{-# SPECIALIZE choice :: [Parser Text a] -> Parser Text a #-}
-{-# SPECIALIZE choice :: [Z.Parser a] -> Z.Parser a #-}
-
--- | @option x p@ tries to apply action @p@. If @p@ fails without
--- consuming input, it returns the value @x@, otherwise the value
--- returned by @p@.
---
--- > priority  = option 0 (digitToInt <$> digit)
-option :: Alternative f => a -> f a -> f a
-option x p = p <|> pure x
-{-# SPECIALIZE option :: a -> Parser ByteString a -> Parser ByteString a #-}
-{-# SPECIALIZE option :: a -> Parser Text a -> Parser Text a #-}
-{-# SPECIALIZE option :: a -> Z.Parser a -> Z.Parser a #-}
-
--- | A version of 'liftM2' that is strict in the result of its first
--- action.
-liftM2' :: (Monad m) => (a -> b -> c) -> m a -> m b -> m c
-liftM2' f a b = do
-  !x <- a
-  y <- b
-  return (f x y)
-{-# INLINE liftM2' #-}
-
--- | @many' p@ applies the action @p@ /zero/ or more times. Returns a
--- list of the returned values of @p@. The value returned by @p@ is
--- forced to WHNF.
---
--- >  word  = many' letter
-many' :: (MonadPlus m) => m a -> m [a]
-many' p = many_p
-  where many_p = some_p `mplus` return []
-        some_p = liftM2' (:) p many_p
-{-# INLINE many' #-}
-
--- | @many1 p@ applies the action @p@ /one/ or more times. Returns a
--- list of the returned values of @p@.
---
--- >  word  = many1 letter
-many1 :: Alternative f => f a -> f [a]
-many1 p = liftA2 (:) p (many p)
-{-# INLINE many1 #-}
-
--- | @many1' p@ applies the action @p@ /one/ or more times. Returns a
--- list of the returned values of @p@. The value returned by @p@ is
--- forced to WHNF.
---
--- >  word  = many1' letter
-many1' :: (MonadPlus m) => m a -> m [a]
-many1' p = liftM2' (:) p (many' p)
-{-# INLINE many1' #-}
-
--- | @sepBy p sep@ applies /zero/ or more occurrences of @p@, separated
--- by @sep@. Returns a list of the values returned by @p@.
---
--- > commaSep p  = p `sepBy` (symbol ",")
-sepBy :: Alternative f => f a -> f s -> f [a]
-sepBy p s = liftA2 (:) p ((s *> sepBy1 p s) <|> pure []) <|> pure []
-{-# SPECIALIZE sepBy :: Parser ByteString a -> Parser ByteString s
-                     -> Parser ByteString [a] #-}
-{-# SPECIALIZE sepBy :: Parser Text a -> Parser Text s -> Parser Text [a] #-}
-{-# SPECIALIZE sepBy :: Z.Parser a -> Z.Parser s -> Z.Parser [a] #-}
-
--- | @sepBy' p sep@ applies /zero/ or more occurrences of @p@, separated
--- by @sep@. Returns a list of the values returned by @p@. The value
--- returned by @p@ is forced to WHNF.
---
--- > commaSep p  = p `sepBy'` (symbol ",")
-sepBy' :: (MonadPlus m) => m a -> m s -> m [a]
-sepBy' p s = scan `mplus` return []
-  where scan = liftM2' (:) p ((s >> sepBy1' p s) `mplus` return [])
-{-# SPECIALIZE sepBy' :: Parser ByteString a -> Parser ByteString s
-                      -> Parser ByteString [a] #-}
-{-# SPECIALIZE sepBy' :: Parser Text a -> Parser Text s -> Parser Text [a] #-}
-{-# SPECIALIZE sepBy' :: Z.Parser a -> Z.Parser s -> Z.Parser [a] #-}
-
--- | @sepBy1 p sep@ applies /one/ or more occurrences of @p@, separated
--- by @sep@. Returns a list of the values returned by @p@.
---
--- > commaSep p  = p `sepBy1` (symbol ",")
-sepBy1 :: Alternative f => f a -> f s -> f [a]
-sepBy1 p s = scan
-    where scan = liftA2 (:) p ((s *> scan) <|> pure [])
-{-# SPECIALIZE sepBy1 :: Parser ByteString a -> Parser ByteString s
-                      -> Parser ByteString [a] #-}
-{-# SPECIALIZE sepBy1 :: Parser Text a -> Parser Text s -> Parser Text [a] #-}
-{-# SPECIALIZE sepBy1 :: Z.Parser a -> Z.Parser s -> Z.Parser [a] #-}
-
--- | @sepBy1' p sep@ applies /one/ or more occurrences of @p@, separated
--- by @sep@. Returns a list of the values returned by @p@. The value
--- returned by @p@ is forced to WHNF.
---
--- > commaSep p  = p `sepBy1'` (symbol ",")
-sepBy1' :: (MonadPlus m) => m a -> m s -> m [a]
-sepBy1' p s = scan
-    where scan = liftM2' (:) p ((s >> scan) `mplus` return [])
-{-# SPECIALIZE sepBy1' :: Parser ByteString a -> Parser ByteString s
-                       -> Parser ByteString [a] #-}
-{-# SPECIALIZE sepBy1' :: Parser Text a -> Parser Text s -> Parser Text [a] #-}
-{-# SPECIALIZE sepBy1' :: Z.Parser a -> Z.Parser s -> Z.Parser [a] #-}
-
--- | @manyTill p end@ applies action @p@ /zero/ or more times until
--- action @end@ succeeds, and returns the list of values returned by
--- @p@.  This can be used to scan comments:
---
--- >  simpleComment   = string "<!--" *> manyTill anyChar (string "-->")
---
--- (Note the overlapping parsers @anyChar@ and @string \"-->\"@.
--- While this will work, it is not very efficient, as it will cause a
--- lot of backtracking.)
-manyTill :: Alternative f => f a -> f b -> f [a]
-manyTill p end = scan
-    where scan = (end *> pure []) <|> liftA2 (:) p scan
-{-# SPECIALIZE manyTill :: Parser ByteString a -> Parser ByteString b
-                        -> Parser ByteString [a] #-}
-{-# SPECIALIZE manyTill :: Parser Text a -> Parser Text b -> Parser Text [a] #-}
-{-# SPECIALIZE manyTill :: Z.Parser a -> Z.Parser b -> Z.Parser [a] #-}
-
--- | @manyTill' p end@ applies action @p@ /zero/ or more times until
--- action @end@ succeeds, and returns the list of values returned by
--- @p@.  This can be used to scan comments:
---
--- >  simpleComment   = string "<!--" *> manyTill' anyChar (string "-->")
---
--- (Note the overlapping parsers @anyChar@ and @string \"-->\"@.
--- While this will work, it is not very efficient, as it will cause a
--- lot of backtracking.)
---
--- The value returned by @p@ is forced to WHNF.
-manyTill' :: (MonadPlus m) => m a -> m b -> m [a]
-manyTill' p end = scan
-    where scan = (end >> return []) `mplus` liftM2' (:) p scan
-{-# SPECIALIZE manyTill' :: Parser ByteString a -> Parser ByteString b
-                         -> Parser ByteString [a] #-}
-{-# SPECIALIZE manyTill' :: Parser Text a -> Parser Text b -> Parser Text [a] #-}
-{-# SPECIALIZE manyTill' :: Z.Parser a -> Z.Parser b -> Z.Parser [a] #-}
-
--- | Skip zero or more instances of an action.
-skipMany :: Alternative f => f a -> f ()
-skipMany p = scan
-    where scan = (p *> scan) <|> pure ()
-{-# SPECIALIZE skipMany :: Parser ByteString a -> Parser ByteString () #-}
-{-# SPECIALIZE skipMany :: Parser Text a -> Parser Text () #-}
-{-# SPECIALIZE skipMany :: Z.Parser a -> Z.Parser () #-}
-
--- | Skip one or more instances of an action.
-skipMany1 :: Alternative f => f a -> f ()
-skipMany1 p = p *> skipMany p
-{-# SPECIALIZE skipMany1 :: Parser ByteString a -> Parser ByteString () #-}
-{-# SPECIALIZE skipMany1 :: Parser Text a -> Parser Text () #-}
-{-# SPECIALIZE skipMany1 :: Z.Parser a -> Z.Parser () #-}
-
--- | Apply the given action repeatedly, returning every result.
-count :: Monad m => Int -> m a -> m [a]
-count n p = sequence (replicate n p)
-{-# INLINE count #-}
-
--- | Combine two alternatives.
-eitherP :: (Alternative f) => f a -> f b -> f (Either a b)
-eitherP a b = (Left <$> a) <|> (Right <$> b)
-{-# INLINE eitherP #-}
-
--- | The parser @satisfyElem p@ succeeds for any chunk element for which the
--- predicate @p@ returns 'True'. Returns the element that is
--- actually parsed.
---
--- >digit = satisfyElem isDigit
--- >    where isDigit c = c >= '0' && c <= '9'
-satisfyElem :: Chunk t => (ChunkElem t -> Bool) -> Parser t (ChunkElem t)
-satisfyElem p = do
-  c <- ensure 1
-  let !h = unsafeChunkHead c
-  if p h
-    then put (unsafeChunkTail c) >> return h
-    else fail "satisfyElem"
-{-# INLINE satisfyElem #-}
-
--- | Match only if all input has been consumed.
-endOfInput :: Chunk t => Parser t ()
-endOfInput = Parser $ \i0 a0 m0 kf ks ->
-             if nullChunk (unI i0)
-             then if m0 == Complete
-                  then ks i0 a0 m0 ()
-                  else let kf' i1 a1 m1 _ _ = addS i0 a0 m0 i1 a1 m1 $
-                                              \ i2 a2 m2 -> ks i2 a2 m2 ()
-                           ks' i1 a1 m1 _   = addS i0 a0 m0 i1 a1 m1 $
-                                              \ i2 a2 m2 -> kf i2 a2 m2 []
-                                                            "endOfInput"
-                       in  runParser demandInput i0 a0 m0 kf' ks'
-             else kf i0 a0 m0 [] "endOfInput"
-{-# SPECIALIZE endOfInput :: Parser ByteString () #-}
-{-# SPECIALIZE endOfInput :: Parser Text () #-}
-
--- | Return an indication of whether the end of input has been
--- reached.
-atEnd :: Chunk t => Parser t Bool
-atEnd = not <$> wantInput
-{-# INLINE atEnd #-}

File Data/Attoparsec/Internal.hs

-{-# LANGUAGE BangPatterns #-}
--- |
--- Module      :  Data.Attoparsec.Internal
--- Copyright   :  Bryan O'Sullivan 2012
--- License     :  BSD3
---
--- Maintainer  :  bos@serpentine.com
--- Stability   :  experimental
--- Portability :  unknown
---
--- Simple, efficient parser combinators, loosely based on the Parsec
--- library.
-
-module Data.Attoparsec.Internal
-    (
-      compareResults
-    , get
-    , put
-    , ensure
-    , prompt
-    , demandInput
-    , wantInput
-    ) where
-
-import Data.Attoparsec.Internal.Types
-import Data.ByteString (ByteString)
-import Data.Text (Text)
-
--- | Compare two 'IResult' values for equality.
---
--- If both 'IResult's are 'Partial', the result will be 'Nothing', as
--- they are incomplete and hence their equality cannot be known.
--- (This is why there is no 'Eq' instance for 'IResult'.)
-compareResults :: (Eq t, Eq r) => IResult t r -> IResult t r -> Maybe Bool
-compareResults (Fail i0 ctxs0 msg0) (Fail i1 ctxs1 msg1) =
-    Just (i0 == i1 && ctxs0 == ctxs1 && msg0 == msg1)
-compareResults (Done i0 r0) (Done i1 r1) =
-    Just (i0 == i1 && r0 == r1)
-compareResults (Partial _) (Partial _) = Nothing
-compareResults _ _ = Just False
-
-get :: Parser t t
-get = Parser $ \i0 a0 m0 _kf ks -> ks i0 a0 m0 (unI i0)
-{-# INLINE get #-}
-
-put :: t -> Parser t ()
-put c = Parser $ \_i0 a0 m0 _kf ks -> ks (I c) a0 m0 ()
-{-# INLINE put #-}
-
-ensure' :: Chunk t
-        => Int -> Input t -> Added t -> More -> Failure t r -> Success t t r
-        -> IResult t r
-ensure' !n0 i0 a0 m0 kf0 ks0 =
-    runParser (demandInput >> go n0) i0 a0 m0 kf0 ks0
-  where
-    go !n = Parser $ \i a m kf ks ->
-        if chunkLengthAtLeast (unI i) n
-        then ks i a m (unI i)
-        else runParser (demandInput >> go n) i a m kf ks
-{-# SPECIALIZE ensure' :: Int -> Input ByteString -> Added ByteString -> More
-                       -> Failure ByteString r
-                       -> Success ByteString ByteString r
-                       -> IResult ByteString r #-}
-{-# SPECIALIZE ensure' :: Int -> Input Text -> Added Text -> More
-                       -> Failure Text r -> Success Text Text r
-                       -> IResult Text r #-}
-
--- | If at least @n@ elements of input are available, return the
--- current input, otherwise fail.
-ensure :: Chunk t => Int -> Parser t t
-ensure !n = Parser $ \i0 a0 m0 kf ks ->
-    if chunkLengthAtLeast (unI i0) n
-    then ks i0 a0 m0 (unI i0)
-    -- The uncommon case is kept out-of-line to reduce code size:
-    else ensure' n i0 a0 m0 kf ks
--- Non-recursive so the bounds check can be inlined:
-{-# INLINE ensure #-}
-
--- | Ask for input.  If we receive any, pass it to a success
--- continuation, otherwise to a failure continuation.
-prompt :: Chunk t
-       => Input t -> Added t -> More
-       -> (Input t -> Added t -> More -> IResult t r)
-       -> (Input t -> Added t -> More -> IResult t r)
-       -> IResult t r
-prompt i0 a0 _m0 kf ks = Partial $ \s ->
-    if nullChunk s
-    then kf i0 a0 Complete
-    else ks (i0 <> I s) (a0 <> A s) Incomplete
-{-# SPECIALIZE prompt :: Input ByteString -> Added ByteString -> More
-                      -> (Input ByteString -> Added ByteString -> More
-                          -> IResult ByteString r)
-                      -> (Input ByteString -> Added ByteString -> More
-                          -> IResult ByteString r)
-                      -> IResult ByteString r #-}
-{-# SPECIALIZE prompt :: Input Text -> Added Text -> More
-                      -> (Input Text -> Added Text -> More -> IResult Text r)
-                      -> (Input Text -> Added Text-> More -> IResult Text r)
-                      -> IResult Text r #-}
-
--- | Immediately demand more input via a 'Partial' continuation
--- result.
-demandInput :: Chunk t => Parser t ()
-demandInput = Parser $ \i0 a0 m0 kf ks ->
-    if m0 == Complete
-    then kf i0 a0 m0 ["demandInput"] "not enough input"
-    else let kf' i a m = kf i a m ["demandInput"] "not enough input"
-             ks' i a m = ks i a m ()
-         in prompt i0 a0 m0 kf' ks'
-{-# SPECIALIZE demandInput :: Parser ByteString () #-}
-{-# SPECIALIZE demandInput :: Parser Text () #-}
-
--- | This parser always succeeds.  It returns 'True' if any input is
--- available either immediately or on demand, and 'False' if the end
--- of all input has been reached.
-wantInput :: Chunk t => Parser t Bool
-wantInput = Parser $ \i0 a0 m0 _kf ks ->
-  case () of
-    _ | not (nullChunk (unI i0)) -> ks i0 a0 m0 True
-      | m0 == Complete  -> ks i0 a0 m0 False
-      | otherwise       -> let kf' i a m = ks i a m False
-                               ks' i a m = ks i a m True
-                           in prompt i0 a0 m0 kf' ks'
-{-# SPECIALIZE wantInput :: Parser ByteString Bool #-}
-{-# SPECIALIZE wantInput :: Parser Text Bool #-}

File Data/Attoparsec/Internal/Types.hs

-{-# LANGUAGE BangPatterns, CPP, GeneralizedNewtypeDeriving, OverloadedStrings,
-    Rank2Types, RecordWildCards, TypeFamilies #-}
--- |
--- Module      :  Data.Attoparsec.Internal.Types
--- Copyright   :  Bryan O'Sullivan 2007-2011
--- License     :  BSD3
---
--- Maintainer  :  bos@serpentine.com
--- Stability   :  experimental
--- Portability :  unknown
---
--- Simple, efficient parser combinators, loosely based on the Parsec
--- library.
-
-module Data.Attoparsec.Internal.Types
-    (
-      Parser(..)
-    , Failure
-    , Success
-    , IResult(..)
-    , Input(..)
-    , Added(..)
-    , More(..)
-    , addS
-    , (<>)
-    , Chunk(..)
-    ) where
-
-import Control.Applicative (Alternative(..), Applicative(..), (<$>))
-import Control.DeepSeq (NFData(rnf))
-import Control.Monad (MonadPlus(..))
-import Data.ByteString (ByteString)
-import Data.ByteString.Internal (w2c)
-import Data.Monoid (Monoid(..))
-import Data.Text (Text)
-import Data.Word (Word8)
-import Prelude hiding (getChar, take, takeWhile)
-import qualified Data.ByteString as BS
-import qualified Data.ByteString.Unsafe as BS
-import qualified Data.Text as T
-import qualified Data.Text.Unsafe as T
-
--- | The result of a parse.  This is parameterised over the type @t@
--- of string that was processed.
---
--- This type is an instance of 'Functor', where 'fmap' transforms the
--- value in a 'Done' result.
-data IResult t r = Fail t [String] String
-                 -- ^ The parse failed.  The 't' parameter is the
-                 -- input that had not yet been consumed when the
-                 -- failure occurred.  The @[@'String'@]@ is a list of
-                 -- contexts in which the error occurred.  The
-                 -- 'String' is the message describing the error, if
-                 -- any.
-                 | Partial (t -> IResult t r)
-                 -- ^ Supply this continuation with more input so that
-                 -- the parser can resume.  To indicate that no more
-                 -- input is available, use an empty string.
-                 | Done t r
-                 -- ^ The parse succeeded.  The 't' parameter is the
-                 -- input that had not yet been consumed (if any) when
-                 -- the parse succeeded.
-
-instance (Show t, Show r) => Show (IResult t r) where
-    show (Fail t stk msg) =
-        "Fail " ++ show t ++ " " ++ show stk ++ " " ++ show msg
-    show (Partial _)      = "Partial _"
-    show (Done t r)       = "Done " ++ show t ++ " " ++ show r
-
-instance (NFData t, NFData r) => NFData (IResult t r) where
-    rnf (Fail t stk msg) = rnf t `seq` rnf stk `seq` rnf msg
-    rnf (Partial _)  = ()
-    rnf (Done t r)   = rnf t `seq` rnf r
-    {-# INLINE rnf #-}
-
-fmapR :: (a -> b) -> IResult t a -> IResult t b
-fmapR _ (Fail t stk msg) = Fail t stk msg
-fmapR f (Partial k)       = Partial (fmapR f . k)
-fmapR f (Done t r)       = Done t (f r)
-
-instance Functor (IResult t) where
-    fmap = fmapR
-    {-# INLINE fmap #-}
-
-newtype Input t = I {unI :: t} deriving (Monoid)
-newtype Added t = A {unA :: t} deriving (Monoid)
-
--- | The core parser type.  This is parameterised over the type @t@ of
--- string being processed.
---
--- This type is an instance of the following classes:
---
--- * 'Monad', where 'fail' throws an exception (i.e. fails) with an
---   error message.
---
--- * 'Functor' and 'Applicative', which follow the usual definitions.
---
--- * 'MonadPlus', where 'mzero' fails (with no error message) and
---   'mplus' executes the right-hand parser if the left-hand one
---   fails.  When the parser on the right executes, the input is reset
---   to the same state as the parser on the left started with. (In
---   other words, Attoparsec is a backtracking parser that supports
---   arbitrary lookahead.)
---
--- * 'Alternative', which follows 'MonadPlus'.
-newtype Parser t a = Parser {
-      runParser :: forall r. Input t -> Added t -> More
-                -> Failure t   r
-                -> Success t a r
-                -> IResult t r
-    }
-
-type Failure t   r = Input t -> Added t -> More -> [String] -> String
-                   -> IResult t r
-type Success t a r = Input t -> Added t -> More -> a -> IResult t r
-
--- | Have we read all available input?
-data More = Complete | Incomplete
-            deriving (Eq, Show)
-
-instance Monoid More where
-    mappend c@Complete _ = c
-    mappend _ m          = m
-    mempty               = Incomplete
-
-addS :: (Monoid t) =>
-        Input t -> Added t -> More
-     -> Input t -> Added t -> More
-     -> (Input t -> Added t -> More -> r) -> r
-addS i0 a0 m0 _i1 a1 m1 f =
-    let !i = i0 <> I (unA a1)
-        a  = a0 <> a1
-        !m = m0 <> m1
-    in f i a m
-{-# INLINE addS #-}
-
-bindP :: Parser t a -> (a -> Parser t b) -> Parser t b
-bindP m g =
-    Parser $ \i0 a0 m0 kf ks -> runParser m i0 a0 m0 kf $
-                                \i1 a1 m1 a -> runParser (g a) i1 a1 m1 kf ks
-{-# INLINE bindP #-}
-
-returnP :: a -> Parser t a
-returnP a = Parser (\i0 a0 m0 _kf ks -> ks i0 a0 m0 a)
-{-# INLINE returnP #-}
-
-instance Monad (Parser t) where
-    return = returnP
-    (>>=)  = bindP
-    fail   = failDesc
-
-noAdds :: (Monoid t) =>
-          Input t -> Added t -> More
-       -> (Input t -> Added t -> More -> r) -> r
-noAdds i0 _a0 m0 f = f i0 mempty m0
-{-# INLINE noAdds #-}
-
-plus :: (Monoid t) => Parser t a -> Parser t a -> Parser t a
-plus a b = Parser $ \i0 a0 m0 kf ks ->
-           let kf' i1 a1 m1 _ _ = addS i0 a0 m0 i1 a1 m1 $
-                                  \ i2 a2 m2 -> runParser b i2 a2 m2 kf ks
-               ks' i1 a1 m1 = ks i1 (a0 <> a1) m1
-           in  noAdds i0 a0 m0 $ \i2 a2 m2 -> runParser a i2 a2 m2 kf' ks'
-
-instance (Monoid t) => MonadPlus (Parser t) where
-    mzero = failDesc "mzero"
-    {-# INLINE mzero #-}
-    mplus = plus
-
-fmapP :: (a -> b) -> Parser t a -> Parser t b
-fmapP p m = Parser $ \i0 a0 m0 f k ->
-            runParser m i0 a0 m0 f $ \i1 a1 s1 a -> k i1 a1 s1 (p a)
-{-# INLINE fmapP #-}
-
-instance Functor (Parser t) where
-    fmap = fmapP
-    {-# INLINE fmap #-}
-
-apP :: Parser t (a -> b) -> Parser t a -> Parser t b
-apP d e = do
-  b <- d
-  a <- e
-  return (b a)
-{-# INLINE apP #-}
-
-instance Applicative (Parser t) where
-    pure   = returnP
-    {-# INLINE pure #-}
-    (<*>)  = apP
-    {-# INLINE (<*>) #-}
-
-#if MIN_VERSION_base(4,2,0)
-    -- These definitions are equal to the defaults, but this
-    -- way the optimizer doesn't have to work so hard to figure
-    -- that out.
-    (*>)   = (>>)
-    {-# INLINE (*>) #-}
-    x <* y = x >>= \a -> y >> return a
-    {-# INLINE (<*) #-}
-#endif
-
-instance (Monoid t) => Monoid (Parser t a) where
-    mempty  = failDesc "mempty"
-    {-# INLINE mempty #-}
-    mappend = plus
-    {-# INLINE mappend #-}
-
-instance (Monoid t) => Alternative (Parser t) where
-    empty = failDesc "empty"
-    {-# INLINE empty #-}
-
-    (<|>) = plus
-    {-# INLINE (<|>) #-}
-
-#if MIN_VERSION_base(4,2,0)
-    many v = many_v
-        where many_v = some_v <|> pure []
-              some_v = (:) <$> v <*> many_v
-    {-# INLINE many #-}
-
-    some v = some_v
-      where
-        many_v = some_v <|> pure []
-        some_v = (:) <$> v <*> many_v
-    {-# INLINE some #-}
-#endif
-
-failDesc :: String -> Parser t a
-failDesc err = Parser (\i0 a0 m0 kf _ks -> kf i0 a0 m0 [] msg)
-    where msg = "Failed reading: " ++ err
-{-# INLINE failDesc #-}
-
-(<>) :: (Monoid m) => m -> m -> m
-(<>) = mappend
-{-# INLINE (<>) #-}
-
--- | A common interface for input chunks.
-class Monoid c => Chunk c where
-  type ChunkElem c
-  -- | Test if the chunk is empty.
-  nullChunk :: c -> Bool
-  -- | Get the head element of a non-empty chunk.
-  unsafeChunkHead :: c -> ChunkElem c
-  -- | Get the tail of a non-empty chunk.
-  unsafeChunkTail :: c -> c
-  -- | Check if the chunk has the length of at least @n@ elements.
-  chunkLengthAtLeast :: c -> Int -> Bool
-  -- | Map an element to the corresponding character.
-  --   The first argument is ignored.
-  chunkElemToChar :: c -> ChunkElem c -> Char
-
-instance Chunk ByteString where
-  type ChunkElem ByteString = Word8
-  nullChunk = BS.null
-  {-# INLINE nullChunk #-}
-  unsafeChunkHead = BS.unsafeHead
-  {-# INLINE unsafeChunkHead #-}
-  unsafeChunkTail = BS.unsafeTail
-  {-# INLINE unsafeChunkTail #-}
-  chunkLengthAtLeast bs n = BS.length bs >= n
-  {-# INLINE chunkLengthAtLeast #-}
-  chunkElemToChar = const w2c
-  {-# INLINE chunkElemToChar #-}
-
-instance Chunk Text where
-  type ChunkElem Text = Char
-  nullChunk = T.null
-  {-# INLINE nullChunk #-}
-  unsafeChunkHead = T.unsafeHead
-  {-# INLINE unsafeChunkHead #-}
-  unsafeChunkTail = T.unsafeTail
-  {-# INLINE unsafeChunkTail #-}
-  chunkLengthAtLeast t n = T.lengthWord16 t `quot` 2 >= n || T.length t >= n
-  {-# INLINE chunkLengthAtLeast #-}
-  chunkElemToChar = const id
-  {-# INLINE chunkElemToChar #-}

File Data/Attoparsec/Monoid.hs

--- |
--- Module      :  Data.Attoparsec.Monoid
--- Copyright   :  Bryan O'Sullivan 2007-2011
--- License     :  BSD3
---
--- Maintainer  :  bos@serpentine.com
--- Stability   :  experimental
--- Portability :  unknown
---
--- Simple, efficient combinator parsing for
--- 'Data.Monoid.Cancellative.LeftReductiveMonoid' and
--- 'Data.Monoid.Factorial.FactorialMonoid' inputs, loosely based on
--- the Parsec library.
-
-module Data.Attoparsec.Monoid
-    (
-    -- * Differences from Parsec
-    -- $parsec
-
-    -- * Incremental input
-    -- $incremental
-
-    -- * Performance considerations
-    -- $performance
-
-    -- * Parser types
-      I.Parser
-    , Result
-    , T.IResult(..)
-    , I.compareResults
-
-    -- * Running parsers
-    , parse
-    , feed
-    , I.parseOnly
-    , parseWith
-    , parseTest
-
-    -- ** Result conversion
-    , maybeResult
-    , eitherResult
-
-    -- * Combinators
-    , module Data.Attoparsec.Combinator
-
-    -- * Parsing individual tokens
-    , I.anyToken
-    , I.peekToken
-    , I.satisfy
-    , I.satisfyWith
-    , I.skip
-
-    -- ** Parsing individual characters
-    , I.anyChar
-    , I.char
-    , I.satisfyChar
-
-    -- * Efficient string handling
-    , I.string
-    , I.skipWhile
-    , I.take
-    , I.takeWhile
-    , I.takeWhile1
-    , I.takeTill
-
-    -- ** Efficient character string handling
-    , I.takeCharsWhile
-    , I.takeCharsWhile1
-    , I.takeCharsTill
-    , I.takeTillChar
-    , I.takeTillChar1
-
-    -- ** Consume all remaining input
-    , I.takeRest
-
-    -- * Text parsing
-    , I.endOfLine
-    ) where
-
-import Data.Monoid (Monoid, (<>))
-
-import Data.Attoparsec.Combinator
-import qualified Data.Attoparsec.Monoid.Internal as I
-import qualified Data.Attoparsec.Internal as I
-import Data.Attoparsec.Monoid.Internal (Result, parse)
-import qualified Data.Attoparsec.Internal.Types as T
-
--- $parsec
---
--- Compared to Parsec 3, Attoparsec makes several tradeoffs.  It is
--- not intended for, or ideal for, all possible uses.
---
--- * While Attoparsec can consume input incrementally, Parsec cannot.
---   Incremental input is a huge deal for efficient and secure network
---   and system programming, since it gives much more control to users
---   of the library over matters such as resource usage and the I/O
---   model to use.
---
--- * Much of the performance advantage of Attoparsec is gained via
---   high-performance parsers such as 'I.takeWhile' and 'I.string'.
---   If you use complicated combinators that return lists of bytes or
---   characters, there is less performance difference between the two
---   libraries.
---
--- * Unlike Parsec 3, Attoparsec does not support being used as a
---   monad transformer.
---
--- * Attoparsec is specialised to deal only with input in 'Monoid'
---   class; more specifically, input that belongs to 'FactorialMonoid' 
---   and 'CancellativeMonoid' classes. Character-oriented parsers also 
---   require 'TextualMonoid' instances for their inputs.
---
--- * Parsec parsers can produce more helpful error messages than
---   Attoparsec parsers.  This is a matter of focus: Attoparsec avoids
---   the extra book-keeping in favour of higher performance.
-
--- $incremental
---
--- Attoparsec supports incremental input, meaning that you can feed it
--- a chunk of input that represents only part of the expected total
--- amount of data to parse. If your parser reaches the end of a
--- fragment of input and could consume more input, it will suspend
--- parsing and return a 'T.Partial' continuation.
---
--- Supplying the 'T.Partial' continuation with another string will
--- resume parsing at the point where it was suspended. You must be
--- prepared for the result of the resumed parse to be another
--- 'T.Partial' continuation.
---
--- To indicate that you have no more input, supply the 'T.Partial'
--- continuation with an empty string.
---
--- Remember that some parsing combinators will not return a result
--- until they reach the end of input.  They may thus cause 'T.Partial'
--- results to be returned.
---
--- If you do not need support for incremental input, consider using
--- the 'I.parseOnly' function to run your parser.  It will never
--- prompt for more input.
-
--- $performance
---
--- If you write an Attoparsec-based parser carefully, it can be
--- realistic to expect it to perform within a factor of 2 of a
--- hand-rolled C parser (measuring megabytes parsed per second).
---
--- To actually achieve high performance, there are a few guidelines
--- that it is useful to follow.
---
--- Use the input-returning parsers whenever possible,
--- e.g. 'I.takeWhile1' instead of 'many1' 'I.anyToken'.  There is
--- about a large difference in performance between the two kinds of
--- parsers.
---
--- Make active use of benchmarking and profiling tools to measure,
--- find the problems with, and improve the performance of your parser.
-
--- | If a parser has returned a 'T.Partial' result, supply it with more
--- input.
-feed :: Monoid t => Result t r -> t -> Result t r
-feed f@(T.Fail _ _ _) _ = f
-feed (T.Partial k) d    = k d
-feed (T.Done t r) d    = T.Done (t <> d) r
-{-# INLINE feed #-}
-
--- | Run a parser and print its result to standard output.
-parseTest :: (Monoid t, Show t, Show a) => I.Parser t a -> t -> IO ()
-parseTest p s = print (parse p s)
-
--- | Run a parser with an initial input string, and a monadic action
--- that can supply more input if needed.
-parseWith :: (Monoid t, Monad m) => m t
-          -- ^ An action that will be executed to provide the parser
-          -- with more input, if necessary.  The action must return an
-          -- 'mempty' string when there is no more input available.
-          -> I.Parser t a
-          -> t
-          -- ^ Initial input for the parser.
-          -> m (Result t a)
-parseWith refill p s = step $ parse p s
-  where step (T.Partial k) = (step . k) =<< refill
-        step r             = return r
-{-# INLINE parseWith #-}
-
--- | Convert a 'Result' value to a 'Maybe' value. A 'T.Partial' result
--- is treated as failure.
-maybeResult :: Result t r -> Maybe r
-maybeResult (T.Done _ r) = Just r
-maybeResult _            = Nothing
-
--- | Convert a 'Result' value to an 'Either' value. A 'T.Partial'
--- result is treated as failure.
-eitherResult :: Result t r -> Either String r
-eitherResult (T.Done _ r)     = Right r
-eitherResult (T.Fail _ _ msg) = Left msg
-eitherResult _                = Left "Result: incomplete input"

File Data/Attoparsec/Monoid/Internal.hs

-{-# LANGUAGE BangPatterns, CPP, Rank2Types, OverloadedStrings #-}
--- |
--- Module      :  Data.Attoparsec.Monoid.Internal
--- Copyright   :  Bryan O'Sullivan 2007-2011
--- License     :  BSD3
---
--- Maintainer  :  bos@serpentine.com
--- Stability   :  experimental
--- Portability :  unknown
---
--- Simple, efficient combinator parsing for
--- 'Data.Monoid.Cancellative.LeftGCDMonoid' and
--- 'Data.Monoid.Factorial.FactorialMonoid' inputs, loosely based on
--- the Parsec library.
-
-module Data.Attoparsec.Monoid.Internal
-    (
-    -- * Parser types
-      Parser
-    , Result
-
-    -- * Running parsers
-    , parse
-    , parseOnly
-
-    -- * Combinators
-    , module Data.Attoparsec.Combinator
-
-    -- * Parsing individual tokens
-    , satisfy
-    , satisfyWith
-    , anyToken
-    , skip
-    , peekToken
-    
-    -- ** Parsing individual characters
-    , anyChar
-    , char
-    , satisfyChar
-
-    -- * Efficient string handling
-    , skipWhile
-    , string
-    , stringTransform
-    , take
-    , takeWhile
-    , takeWhile1
-    , takeTill
-
-    -- ** Efficient character string handling
-    , takeCharsWhile
-    , takeCharsWhile1
-    , takeCharsTill
-    , takeTillChar
-    , takeTillChar1
-
-    -- ** Consume all remaining input
-    , takeRest
-
-    -- * Utilities
-    , endOfLine
-    ) where
-
-import Control.Applicative ((<|>), (<$>))
-import Control.Monad (when)
-import Data.Attoparsec.Combinator
-import Data.Attoparsec.Internal.Types
-import Data.Monoid (Monoid(..))
-import Data.Monoid.Cancellative (LeftGCDMonoid(..))
-import Data.Monoid.Null (MonoidNull(null))
-import qualified Data.Monoid.Factorial as Factorial
-import Data.Monoid.Factorial (FactorialMonoid)
-import Data.Monoid.Textual (TextualMonoid)
-import qualified Data.Monoid.Textual as Textual
-import Prelude hiding (getChar, null, span, take, takeWhile)
-import qualified Data.Attoparsec.Internal.Types as T
-
-type Result = IResult
-
-ensure' :: FactorialMonoid t => Int -> T.Input t -> T.Added t -> More -> T.Failure t r -> T.Success t t r
-        -> IResult t r
-ensure' !n0 i0 a0 m0 kf0 ks0 =
-    T.runParser (demandInput >> go n0) i0 a0 m0 kf0 ks0
-  where
-    go !n = T.Parser $ \i a m kf ks ->
-        if Factorial.length (unI i) >= n
-        then ks i a m (unI i)
-        else T.runParser (demandInput >> go n) i a m kf ks
-
--- | If at least @n@ prime tokens of input are available, return the
--- current input, otherwise fail.
-ensure :: FactorialMonoid t => Int -> Parser t t
-ensure !n = T.Parser $ \i0 a0 m0 kf ks ->
-    if Factorial.length (unI i0) >= n
-    then ks i0 a0 m0 (unI i0)
-    -- The uncommon case is kept out-of-line to reduce code size:
-    else ensure' n i0 a0 m0 kf ks
--- Non-recursive so the bounds check can be inlined:
-{-# INLINE ensure #-}
-
--- | If at least one token of input is available, return the current
--- input, otherwise fail.
-ensureOne :: FactorialMonoid t => Parser t t
-ensureOne = T.Parser $ \i0 a0 m0 kf ks ->
-    if null (unI i0)
-    -- The uncommon case is kept out-of-line to reduce code size:
-    then ensure' 1 i0 a0 m0 kf ks
-    else ks i0 a0 m0 (unI i0)
--- Non-recursive so the bounds check can be inlined:
-{-# INLINE ensureOne #-}
-
--- | Ask for input.  If we receive any, pass it to a success
--- continuation, otherwise to a failure continuation.
-prompt :: MonoidNull t => Input t -> Added t -> More
-       -> (Input t -> Added t -> More -> IResult t r)
-       -> (Input t -> Added t -> More -> IResult t r)
-       -> IResult t r
-prompt i0 a0 _m0 kf ks = Partial $ \s ->
-    if null s
-    then kf i0 a0 Complete
-    else ks (i0 <> I s) (a0 <> A s) Incomplete
-
--- | Immediately demand more input via a 'Partial' continuation
--- result.
-demandInput :: MonoidNull t => Parser t ()
-demandInput = T.Parser $ \i0 a0 m0 kf ks ->
-    if m0 == Complete
-    then kf i0 a0 m0 ["demandInput"] "not enough input"
-    else let kf' i a m = kf i a m ["demandInput"] "not enough input"
-             ks' i a m = ks i a m ()
-         in prompt i0 a0 m0 kf' ks'
-
--- | This parser always succeeds.  It returns 'True' if any input is
--- available either immediately or on demand, and 'False' if the end
--- of all input has been reached.
-wantInput :: MonoidNull t => Parser t Bool
-wantInput = T.Parser $ \i0 a0 m0 _kf ks ->
-  case () of
-    _ | not (null (unI i0)) -> ks i0 a0 m0 True
-      | m0 == Complete  -> ks i0 a0 m0 False
-      | otherwise       -> let kf' i a m = ks i a m False
-                               ks' i a m = ks i a m True
-                           in prompt i0 a0 m0 kf' ks'
-
--- | This parser always succeeds.  It returns 'True' if any input is
--- available on demand, and 'False' if the end of all input has been reached.
-wantMoreInput :: MonoidNull t => Parser t Bool
-wantMoreInput = T.Parser $ \i0 a0 m0 _kf ks ->
-  if m0 == Complete  
-  then ks i0 a0 m0 False
-  else let kf' i a m = ks i a m False
-           ks' i a m = ks i a m True
-       in prompt i0 a0 m0 kf' ks'
-
-get :: Parser t t
-get  = T.Parser $ \i0 a0 m0 _kf ks -> ks i0 a0 m0 (unI i0)
-
-put :: t -> Parser t ()
-put s = T.Parser $ \_i0 a0 m0 _kf ks -> ks (I s) a0 m0 ()
-
--- | The parser @satisfy p@ succeeds for any prime input token for
--- which the predicate @p@ returns 'True'. Returns the token that is
--- actually parsed.
---
--- >digit = satisfy isDigit
--- >    where isDigit w = w >= "0" && w <= "9"
-satisfy :: FactorialMonoid t => (t -> Bool) -> Parser t t
-satisfy p = do
-  s <- ensureOne
-  let Just (first, rest) = Factorial.splitPrimePrefix s
-  if p first then put rest >> return first else fail "satisfy"
-{-# INLINE satisfy #-}
-
--- | The parser @satisfy p@ succeeds for any input character for
--- which the predicate @p@ returns 'True'. Returns the character that 
--- is actually parsed.
---
--- >digit = satisfy isDigit
--- >    where isDigit w = w >= "0" && w <= "9"
-satisfyChar :: TextualMonoid t => (Char -> Bool) -> Parser t Char
-satisfyChar p = do
-  s <- ensureOne
-  case Textual.splitCharacterPrefix s 
-     of Just (first, rest) | p first -> put rest >> return first 
-        _ -> fail "satisfy"
-{-# INLINE satisfyChar #-}
-
--- | The parser @skip p@ succeeds for any prime input token for which
--- the predicate @p@ returns 'True'.
---
--- >skipDigit = skip isDigit
--- >    where isDigit w = w >= "0" && w <= "9"
-skip :: FactorialMonoid t => (t -> Bool) -> Parser t ()
-skip p = do
-  s <- ensureOne
-  let Just (first, rest) = Factorial.splitPrimePrefix s
-  if p first then put rest else fail "skip"
-
--- | The parser @satisfyWith f p@ transforms an input token, and
--- succeeds if the predicate @p@ returns 'True' on the transformed
--- value. The parser returns the transformed token that was parsed.
-satisfyWith :: FactorialMonoid t => (t -> a) -> (a -> Bool) -> Parser t a
-satisfyWith f p = do
-  s <- ensureOne
-  let Just (first, rest) = Factorial.splitPrimePrefix s
-      c = f $! first
-  if p c then put rest >> return c else fail "satisfyWith"
-{-# INLINE satisfyWith #-}
-
--- | Consume @n@ tokens of input, but succeed only if the predicate
--- returns 'True'.
-takeWith :: FactorialMonoid t => Int -> (t -> Bool) -> Parser t t
-takeWith n0 p = do
-  let n = max n0 0
-  s <- ensure n
-  let (h, t) = Factorial.splitAt n s
-  if p h
-    then put t >> return h
-    else fail "takeWith"
-
--- | Consume exactly @n@ prime input tokens.
-take :: FactorialMonoid t => Int -> Parser t t
-take n = takeWith n (const True)
-{-# INLINE take #-}
-
--- | @string s@ parses a prefix of input that identically matches
--- @s@. Returns the parsed string (i.e. @s@).  This parser consumes no
--- input if it fails (even if a partial match).
---
--- /Note/: The behaviour of this parser is different to that of the
--- similarly-named parser in Parsec, as this one is all-or-nothing.
--- To illustrate the difference, the following parser will fail under
--- Parsec given an input of @\"for\"@:
---
--- >string "foo" <|> string "for"
---
--- The reason for its failure is that the first branch is a
--- partial match, and will consume the letters @\'f\'@ and @\'o\'@
--- before failing.  In Attoparsec, the above parser will /succeed/ on
--- that input, because the failed first branch will consume nothing.
-string :: (LeftGCDMonoid t, MonoidNull t) => t -> Parser t t
-string s =
-   get >>= \i->
-   let !(p, s', i') = stripCommonPrefix s i
-   in if null s'
-      then put i' >> return s
-      else if null i'
-           then put mempty
-                >> demandInput
-                >> string' p s'
-           else fail "string"
-{-# INLINE string #-}
-
--- The uncommon case
-string' :: (LeftGCDMonoid t, MonoidNull t) => t -> t -> Parser t t
-string' consumed rest =
-   get >>= \i->
-   let !(p, s', i') = stripCommonPrefix rest i
-   in if null s'
-      then put i' >> return (consumed <> rest)
-      else if null i'
-           then put mempty
-                >> demandInput
-                >> string' (consumed <> p) s'
-           else put (consumed <> i) 
-                >> fail "string"
-
-stringTransform :: (FactorialMonoid t, Eq t) => (t -> t) -> t
-                -> Parser t t
-stringTransform f s = takeWith (Factorial.length s) ((==f s) . f)
-{-# INLINE stringTransform #-}
-
--- | Skip past input for as long as the predicate returns 'True'.
-skipWhile :: FactorialMonoid t => (t -> Bool) -> Parser t ()
-skipWhile p = go
- where
-  go = do
-    t <- Factorial.dropWhile p <$> get
-    put t
-    when (null t) $ do
-      input <- wantMoreInput
-      when input go
-{-# INLINE skipWhile #-}
-
--- | Consume input as long as the predicate returns 'False'
--- (i.e. until it returns 'True'), and return the consumed input.
---
--- This parser does not fail.  It will return an empty string if the
--- predicate returns 'True' on the first input token.
---
--- /Note/: Because this parser does not fail, do not use it with
--- combinators such as 'many', because such parsers loop until a
--- failure occurs.  Careless use will thus result in an infinite loop.
-takeTill :: FactorialMonoid t => (t -> Bool) -> Parser t t
-takeTill p = takeWhile (not . p)
-{-# INLINE takeTill #-}
-
--- | Consume input characters as long as the predicate returns 'False'
--- (i.e. until it returns 'True'), and return the consumed input.
---
--- This parser does not fail.  It will return an empty string if the
--- predicate returns 'True' on the first input token.
---
--- /Note/: Because this parser does not fail, do not use it with
--- combinators such as 'many', because such parsers loop until a
--- failure occurs.  Careless use will thus result in an infinite loop.
-takeCharsTill :: TextualMonoid t => (Char -> Bool) -> Parser t t
-takeCharsTill p = takeCharsWhile (not . p)
-
--- | Consume all input until the character for which the predicate 
--- returns 'True' and return the consumed input.
---
--- The only difference between 'takeCharsTill' and 'takeTillChar' is
--- in their handling of non-character data: The former never consumes
--- it, the latter always does.
---
--- This parser does not fail.  It will return an empty string if the
--- predicate returns 'True' on the first input token.
---
--- /Note/: Because this parser does not fail, do not use it with
--- combinators such as 'many', because such parsers loop until a
--- failure occurs.  Careless use will thus result in an infinite loop.
-takeTillChar :: TextualMonoid t => (Char -> Bool) -> Parser t t
-takeTillChar p = go id
- where
-  go acc = do
-    (h,t) <- Textual.break (const False) p <$> get
-    put t
-    if null t
-      then do
-        input <- wantInput
-        if input
-          then go (acc . mappend h)
-          else return (acc h)
-      else return (acc h)
-{-# INLINE takeTillChar #-}
-
--- | Consume all input until the character for which the predicate 
--- returns 'True' and return the consumed input.
---
--- This parser always consumes at least one token: it will fail if the 
--- input starts with a character for which the predicate returns 
--- 'True' or if there is no input left.
-takeTillChar1 :: TextualMonoid t => (Char -> Bool) -> Parser t t
-takeTillChar1 p = do
-  (`when` demandInput) =<< null <$> get
-  (h,t) <- Textual.break (const False) p <$> get
-  when (null h && maybe True p (Textual.characterPrefix t)) $ fail "takeTillChar1"
-  put t
-  if null t
-    then (h<>) <$> takeTillChar p
-    else return h
-{-# INLINE takeTillChar1 #-}
-
--- | Consume input as long as the predicate returns 'True', and return
--- the consumed input.
---
--- This parser does not fail.  It will return an empty string if the
--- predicate returns 'False' on the first input token.
---
--- /Note/: Because this parser does not fail, do not use it with
--- combinators such as 'many', because such parsers loop until a
--- failure occurs.  Careless use will thus result in an infinite loop.
-takeWhile :: FactorialMonoid t => (t -> Bool) -> Parser t t
-takeWhile p = go id
- where
-  go acc = do
-    (h,t) <- Factorial.span p <$> get
-    put t
-    if null t
-      then do
-        input <- wantMoreInput
-        if input
-          then go (acc . mappend h)
-          else return (acc h)
-      else return (acc h)
-{-# INLINE takeWhile #-}
-
--- | Consume input characters as long as the predicate returns 'True', 
--- and return the consumed input.
---
--- This parser does not fail.  It will return an empty string if the
--- predicate returns 'False' on the first input token.
---
--- /Note/: Because this parser does not fail, do not use it with
--- combinators such as 'many', because such parsers loop until a
--- failure occurs.  Careless use will thus result in an infinite loop.
-takeCharsWhile :: TextualMonoid t => (Char -> Bool) -> Parser t t
-takeCharsWhile p = go id
- where
-  go acc = do
-    (h,t) <- Textual.span (const False) p <$> get
-    put t
-    if null t
-      then do
-        input <- wantMoreInput
-        if input
-          then go (acc . mappend h)
-          else return (acc h)
-      else return (acc h)
-{-# INLINE takeCharsWhile #-}
-
--- | Consume all remaining input and return it as a single string.
-takeRest :: MonoidNull t => Parser t t
-takeRest = go []
- where
-  go acc = do
-    input <- wantInput
-    if input
-      then do
-        s <- get
-        put mempty
-        go (s:acc)
-      else return (mconcat $ reverse acc)
-
--- | Consume input as long as the predicate returns 'True', and return
--- the consumed input.
---
--- This parser requires the predicate to succeed on at least one input
--- token: it will fail if the predicate never returns 'True'
--- or if there is no input left.
-takeWhile1 :: FactorialMonoid t => (t -> Bool) -> Parser t t
-takeWhile1 p = do
-  (`when` demandInput) =<< null <$> get
-  (h,t) <- Factorial.span p <$> get
-  when (null h) $ fail "takeWhile1"
-  put t
-  if null t
-    then (h<>) `fmap` takeWhile p
-    else return h
-
-takeCharsWhile1 :: TextualMonoid t => (Char -> Bool) -> Parser t t
-takeCharsWhile1 p = do
-  (`when` demandInput) =<< null <$> get
-  (h,t) <- Textual.span (const False) p <$> get
-  when (null h) $ fail "takeCharsWhile1"
-  put t
-  if null t
-    then (h<>) `fmap` takeCharsWhile p
-    else return h
-
-
--- | Match any prime input token.
-anyToken :: FactorialMonoid t => Parser t t
-anyToken = satisfy $ const True
-{-# INLINE anyToken #-}
-
--- | Match any prime input token. Returns 'mempty' if end of input
--- has been reached. Does not consume any input.
---
--- /Note/: Because this parser does not fail, do not use it with
--- combinators such as 'many', because such parsers loop until a
--- failure occurs.  Careless use will thus result in an infinite loop.
-peekToken :: FactorialMonoid t => Parser t t
-peekToken = T.Parser $ \i0 a0 m0 _kf ks ->
-            if null (unI i0)
-            then if m0 == Complete
-                 then ks i0 a0 m0 mempty
-                 else let k' i a m = ks i a m $! Factorial.primePrefix (unI i)
-                      in prompt i0 a0 m0 k' k'
-            else let !w = Factorial.primePrefix (unI i0)
-                 in ks i0 a0 m0 w
-{-# INLINE peekToken #-}
-
--- | Match any character.
-anyChar :: TextualMonoid t => Parser t Char
-anyChar = satisfyChar $ const True
-{-# INLINE anyChar #-}
-
--- | Match a specific character.
-char :: TextualMonoid t => Char -> Parser t Char
-char c = satisfyChar (== c) <?> show c
-{-# INLINE char #-}
-
--- | Match either a single newline character @\'\\n\'@, or a carriage
--- return followed by a newline character @\"\\r\\n\"@.
-endOfLine :: (Eq t, TextualMonoid t) => Parser t ()
-endOfLine = (char '\n' >> return ()) <|> (string "\r\n" >> return ())
-
--- | Terminal failure continuation.
-failK :: Failure t a
-failK i0 _a0 _m0 stack msg = Fail (unI i0) stack msg
-{-# INLINE failK #-}
-
--- | Terminal success continuation.
-successK :: Success t a a
-successK i0 _a0 _m0 a = Done (unI i0) a
-{-# INLINE successK #-}
-
--- | Run a parser.
-parse :: Monoid t => Parser t a -> t -> IResult t a
-parse m s = T.runParser m (I s) mempty Incomplete failK successK
-{-# INLINE parse #-}
-
--- | Run a parser that cannot be resupplied via a 'Partial' result.
-parseOnly :: Monoid t => Parser t a -> t -> Either String a
-parseOnly m s = case T.runParser m (I s) mempty Complete failK successK of
-                  Fail _ _ err -> Left err
-                  Done _ a     -> Right a
-                  _            -> error "parseOnly: impossible error!"
-{-# INLINE parseOnly #-}

File Data/Attoparsec/Number.hs

-{-# LANGUAGE DeriveDataTypeable #-}
--- |
--- Module      :  Data.Attoparsec.Number
--- Copyright   :  Bryan O'Sullivan 2011
--- License     :  BSD3
---
--- Maintainer  :  bos@serpentine.com
--- Stability   :  experimental
--- Portability :  unknown
---
--- A simple number type, useful for parsing both exact and inexact
--- quantities without losing much precision.
-module Data.Attoparsec.Number ( Number(..) ) where
-
-import Control.DeepSeq (NFData(rnf))
-import Data.Data (Data)
-import Data.Function (on)
-import Data.Typeable (Typeable)
-
--- | A numeric type that can represent integers accurately, and
--- floating point numbers to the precision of a 'Double'.
-data Number = I !Integer
-            | D {-# UNPACK #-} !Double
-              deriving (Typeable, Data)
-
-instance Show Number where
-    show (I a) = show a
-    show (D a) = show a
-
-instance NFData Number where
-    rnf (I _) = ()
-    rnf (D _) = ()
-    {-# INLINE rnf #-}
-
-binop :: (Integer -> Integer -> a) -> (Double -> Double -> a)
-      -> Number -> Number -> a
-binop _ d (D a) (D b) = d a b
-binop i _ (I a) (I b) = i a b
-binop _ d (D a) (I b) = d a (fromIntegral b)
-binop _ d (I a) (D b) = d (fromIntegral a) b
-{-# INLINE binop #-}
-
-instance Eq Number where
-    (==) = binop (==) (==)
-    {-# INLINE (==) #-}
-
-    (/=) = binop (/=) (/=)
-    {-# INLINE (/=) #-}
-
-instance Ord Number where
-    (<) = binop (<) (<)
-    {-# INLINE (<) #-}
-
-    (<=) = binop (<=) (<=)
-    {-# INLINE (<=) #-}
-
-    (>) = binop (>) (>)
-    {-# INLINE (>) #-}
-
-    (>=) = binop (>=) (>=)
-    {-# INLINE (>=) #-}
-
-    compare = binop compare compare
-    {-# INLINE compare #-}
-
-instance Num Number where
-    (+) = binop (((I$!).) . (+)) (((D$!).) . (+))
-    {-# INLINE (+) #-}
-
-    (-) = binop (((I$!).) . (-)) (((D$!).) . (-))
-    {-# INLINE (-) #-}
-
-    (*) = binop (((I$!).) . (*)) (((D$!).) . (*))
-    {-# INLINE (*) #-}
-
-    abs (I a) = I $! abs a
-    abs (D a) = D $! abs a
-    {-# INLINE abs #-}
-
-    negate (I a) = I $! negate a
-    negate (D a) = D $! negate a
-    {-# INLINE negate #-}
-
-    signum (I a) = I $! signum a
-    signum (D a) = D $! signum a
-    {-# INLINE signum #-}
-
-    fromInteger = (I$!) . fromInteger
-    {-# INLINE fromInteger #-}
-
-instance Real Number where
-    toRational (I a) = fromIntegral a
-    toRational (D a) = toRational a
-    {-# INLINE toRational #-}
-
-instance Fractional Number where
-    fromRational = (D$!) . fromRational
-    {-# INLINE fromRational #-}
-
-    (/) = binop (((D$!).) . (/) `on` fromIntegral)
-                (((D$!).) . (/))
-    {-# INLINE (/) #-}
-
-    recip (I a) = D $! recip (fromIntegral a)
-    recip (D a) = D $! recip a
-    {-# INLINE recip #-}
-
-instance RealFrac Number where
-    properFraction (I a) = (fromIntegral a,0)
-    properFraction (D a) = case properFraction a of
-                             (i,d) -> (i,D d)
-    {-# INLINE properFraction #-}
-    truncate (I a) = fromIntegral a
-    truncate (D a) = truncate a
-    {-# INLINE truncate #-}
-    round (I a) = fromIntegral a
-    round (D a) = round a
-    {-# INLINE round #-}
-    ceiling (I a) = fromIntegral a
-    ceiling (D a) = ceiling a
-    {-# INLINE ceiling #-}
-    floor (I a) = fromIntegral a
-    floor (D a) = floor a
-    {-# INLINE floor #-}

File Data/Attoparsec/Text/FastSet.hs

------------------------------------------------------------------------------
--- |
--- Module      :  Data.Attoparsec.FastSet
--- Copyright   :  Felipe Lessa 2010, Bryan O'Sullivan 2008
--- License     :  BSD3
---
--- Maintainer  :  felipe.lessa@gmail.com
--- Stability   :  experimental
--- Portability :  unknown
---
--- Fast set membership tests for 'Char' values.  The set
--- representation is unboxed for efficiency.  We test for
--- membership using a binary search.
---
------------------------------------------------------------------------------
-module Data.Attoparsec.Text.FastSet
-    (
-    -- * Data type
-      FastSet
-    -- * Construction
-    , fromList
-    , set
-    -- * Lookup
-    , member
-    -- * Handy interface
-    , charClass
-    ) where
-
-import Data.List (sort)
-import qualified Data.Array.Base as AB
-import qualified Data.Array.Unboxed as A
-import qualified Data.Text as T
-
-newtype FastSet = FastSet (A.UArray Int Char)
-    deriving (Eq, Ord, Show)
-
--- | Create a set.
-set :: T.Text -> FastSet
-set t = mkSet (T.length t) (sort $ T.unpack t)
-
-fromList :: [Char] -> FastSet
-fromList cs = mkSet (length cs) (sort cs)
-
-mkSet :: Int -> [Char] -> FastSet
-mkSet l = FastSet . A.listArray (0,l-1)
-
--- | Check the set for membership.
-member :: Char -> FastSet -> Bool
-member c (FastSet a) = uncurry search (A.bounds a)
-    where search lo hi
-              | hi < lo = False
-              | otherwise =
-                  let mid = (lo + hi) `quot` 2
-                  in case compare c (AB.unsafeAt a mid) of
-                       GT -> search (mid + 1) hi
-                       LT -> search lo (mid - 1)
-                       _ -> True
-
-charClass :: String -> FastSet
-charClass = fromList . go
-    where go (a:'-':b:xs) = [a..b] ++ go xs
-          go (x:xs) = x : go xs
-          go _ = ""

File Data/Attoparsec/Types.hs

--- |
--- Module      :  Data.Attoparsec.Types
--- Copyright   :  Bryan O'Sullivan 2011
--- License     :  BSD3
---
--- Maintainer  :  bos@serpentine.com
--- Stability   :  experimental
--- Portability :  unknown
---
--- Simple, efficient parser combinators for strings, loosely based on
--- the Parsec library.
-
-module Data.Attoparsec.Types
-    (
-      Parser
-    , IResult(..)
-    , Chunk(..)
-    ) where
-
-import Data.Attoparsec.Internal.Types (Parser(..), IResult(..), Chunk(..))

File Data/Attoparsec/Zepto.hs

-{-# LANGUAGE BangPatterns, MagicHash, UnboxedTuples #-}
-
--- |
--- Module      :  Data.Attoparsec.Zepto
--- Copyright   :  Bryan O'Sullivan 2011
--- License     :  BSD3
---
--- Maintainer  :  bos@serpentine.com
--- Stability   :  experimental
--- Portability :  unknown
---
--- A tiny, highly specialized combinator parser for 'B.ByteString'
--- strings.
---
--- While the main Attoparsec module generally performs well, this
--- module is particularly fast for simple non-recursive loops that
--- should not normally result in failed parses.
---
--- /Warning/: on more complex inputs involving recursion or failure,
--- parsers based on this module may be as much as /ten times slower/
--- than regular Attoparsec! You should /only/ use this module when you
--- have benchmarks that prove that its use speeds your code up.
-module Data.Attoparsec.Zepto
-    (
-      Parser
-    , parse
-    , atEnd
-    , string
-    , take
-    , takeWhile
-    ) where
-
-import Data.Word (Word8)
-import Control.Applicative
-import Control.Monad
-import Data.Monoid
-import qualified Data.ByteString as B
-import qualified Data.ByteString.Unsafe as B
-import Data.ByteString (ByteString)
-import Prelude hiding (take, takeWhile)
-
-newtype S = S {
-      input :: ByteString
-    }
-
-data Result a = Fail String
-              | OK !a
-
--- | A simple parser.
---
--- This monad is strict in its state, and the monadic bind operator
--- ('>>=') evaluates each result to weak head normal form before
--- passing it along.
-newtype Parser a = Parser {
-      runParser :: S -> (# Result a, S #)
-    }
-
-instance Functor Parser where
-    fmap f m = Parser $ \s -> case runParser m s of
-                                (# OK a, s' #)     -> (# OK (f a), s' #)
-                                (# Fail err, s' #) -> (# Fail err, s' #)
-    {-# INLINE fmap #-}
-
-instance Monad Parser where
-    return a = Parser $ \s -> (# OK a, s #)
-    {-# INLINE return #-}
-
-    m >>= k   = Parser $ \s -> case runParser m s of
-                                 (# OK a, s' #) -> runParser (k a) s'
-                                 (# Fail err, s' #) -> (# Fail err, s' #)
-    {-# INLINE (>>=) #-}
-
-    fail msg = Parser $ \s -> (# Fail msg, s #)
-
-instance MonadPlus Parser where
-    mzero = fail "mzero"
-    {-# INLINE mzero #-}
-
-    mplus a b = Parser $ \s ->
-                case runParser a s of
-                  (# ok@(OK _), s' #) -> (# ok, s' #)
-                  (# _, _ #) -> case runParser b s of
-                                   (# ok@(OK _), s'' #) -> (# ok, s'' #)
-                                   (# err, s'' #) -> (# err, s'' #)
-    {-# INLINE mplus #-}
-
-instance Applicative Parser where
-    pure   = return
-    {-# INLINE pure #-}
-    (<*>)  = ap
-    {-# INLINE (<*>) #-}
-
-gets :: (S -> a) -> Parser a
-gets f = Parser $ \s -> (# OK (f s), s #)
-{-# INLINE gets #-}
-
-put :: S -> Parser ()
-put s = Parser $ \_ -> (# OK (), s #)
-{-# INLINE put #-}
-
--- | Run a parser.
-parse :: Parser a -> ByteString -> Either String a
-parse p bs = case runParser p (S bs) of
-               (# OK a, _ #) -> Right a
-               (# Fail err, _ #) -> Left err
-
-instance Monoid (Parser a) where
-    mempty  = fail "mempty"
-    {-# INLINE mempty #-}
-    mappend = mplus
-    {-# INLINE mappend #-}
-
-instance Alternative Parser where
-    empty = fail "empty"
-    {-# INLINE empty #-}
-    (<|>) = mplus
-    {-# INLINE (<|>) #-}
-
--- | Consume input while the predicate returns 'True'.
-takeWhile :: (Word8 -> Bool) -> Parser ByteString
-takeWhile p = do
-  (h,t) <- gets (B.span p . input)
-  put (S t)
-  return h
-{-# INLINE takeWhile #-}
-
--- | Consume @n@ bytes of input.
-take :: Int -> Parser ByteString
-take !n = do
-  s <- gets input
-  if B.length s >= n
-    then put (S (B.unsafeDrop n s)) >> return (B.unsafeTake n s)
-    else fail "insufficient input"
-{-# INLINE take #-}
-
--- | Match a string exactly.
-string :: ByteString -> Parser ()
-string s = do
-  i <- gets input
-  if s `B.isPrefixOf` i
-    then put (S (B.unsafeDrop (B.length s) i)) >> return ()
-    else fail "string"
-{-# INLINE string #-}
-
--- | Indicate whether the end of the input has been reached.
-atEnd :: Parser Bool
-atEnd = do
-  i <- gets input
-  return $! B.null i
-{-# INLINE atEnd #-}

File Data/Picoparsec.hs

+-- |
+-- Module      :  Data.Picoparsec
+-- Copyright   :  Bryan O'Sullivan 2007-2011, Mario Blažević <blamario@yahoo.com> 2014
+-- License     :  BSD3
+--
+-- Maintainer  :  Mario Blažević
+-- Stability   :  experimental
+-- Portability :  unknown
+--
+-- Simple, efficient combinator parsing for 'ByteString' strings,
+-- loosely based on the Parsec library.
+
+module Data.Picoparsec
+    (
+      module Data.Picoparsec.Monoid
+    ) where
+
+import Data.Picoparsec.Monoid

File Data/Picoparsec/ByteString/FastSet.hs

+{-# LANGUAGE BangPatterns, MagicHash #-}
+
+-----------------------------------------------------------------------------
+-- |
+-- Module      :  Data.Picoparsec.ByteString.FastSet
+-- Copyright   :  Bryan O'Sullivan 2008, Mario Blažević <blamario@yahoo.com> 2014
+-- License     :  BSD3
+--
+-- Maintainer  :  Mario Blažević
+-- Stability   :  experimental
+-- Portability :  unknown
+--
+-- Fast set membership tests for 'Word8' and 8-bit 'Char' values.  The
+-- set representation is unboxed for efficiency.  For small sets, we
+-- test for membership using a binary search.  For larger sets, we use
+-- a lookup table.
+--
+-----------------------------------------------------------------------------
+module Data.Picoparsec.ByteString.FastSet
+    (
+    -- * Data type
+      FastSet
+    -- * Construction
+    , fromList
+    , set
+    -- * Lookup
+    , memberChar
+    , memberWord8
+    -- * Debugging
+    , fromSet
+    -- * Handy interface
+    , charClass
+    ) where
+
+import Data.Bits ((.&.), (.|.))
+import Foreign.Storable (peekByteOff, pokeByteOff)
+import GHC.Base (Int(I#), iShiftRA#, narrow8Word#, shiftL#)
+import GHC.Word (Word8(W8#))
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Char8 as B8
+import qualified Data.ByteString.Internal as I
+import qualified Data.ByteString.Unsafe as U
+
+data FastSet = Sorted { fromSet :: !B.ByteString }
+             | Table  { fromSet :: !B.ByteString }
+    deriving (Eq, Ord)
+
+instance Show FastSet where
+    show (Sorted s) = "FastSet Sorted " ++ show (B8.unpack s)
+    show (Table _) = "FastSet Table"
+
+-- | The lower bound on the size of a lookup table.  We choose this to
+-- balance table density against performance.
+tableCutoff :: Int
+tableCutoff = 8
+
+-- | Create a set.
+set :: B.ByteString -> FastSet
+set s | B.length s < tableCutoff = Sorted . B.sort $ s
+      | otherwise                = Table . mkTable $ s
+
+fromList :: [Word8] -> FastSet
+fromList = set . B.pack
+
+data I = I {-# UNPACK #-} !Int {-# UNPACK #-} !Word8
+
+shiftR :: Int -> Int -> Int
+shiftR (I# x#) (I# i#) = I# (x# `iShiftRA#` i#)
+
+shiftL :: Word8 -> Int -> Word8
+shiftL (W8# x#) (I# i#) = W8# (narrow8Word# (x# `shiftL#` i#))
+
+index :: Int -> I
+index i = I (i `shiftR` 3) (1 `shiftL` (i .&. 7))
+{-# INLINE index #-}
+
+-- | Check the set for membership.
+memberWord8 :: Word8 -> FastSet -> Bool
+memberWord8 w (Table t)  =
+    let I byte bit = index (fromIntegral w)
+    in  U.unsafeIndex t byte .&. bit /= 0
+memberWord8 w (Sorted s) = search 0 (B.length s - 1)
+    where search lo hi
+              | hi < lo = False
+              | otherwise =
+                  let mid = (lo + hi) `quot` 2
+                  in case compare w (U.unsafeIndex s mid) of
+                       GT -> search (mid + 1) hi
+                       LT -> search lo (mid - 1)
+                       _ -> True
+
+-- | Check the set for membership.  Only works with 8-bit characters:
+-- characters above code point 255 will give wrong answers.
+memberChar :: Char -> FastSet -> Bool
+memberChar c = memberWord8 (I.c2w c)
+{-# INLINE memberChar #-}
+
+mkTable :: B.ByteString -> B.ByteString
+mkTable s = I.unsafeCreate 32 $ \t -> do
+            _ <- I.memset t 0 32
+            U.unsafeUseAsCStringLen s $ \(p, l) ->
+              let loop n | n == l = return ()
+                         | otherwise = do
+                    c <- peekByteOff p n :: IO Word8
+                    let I byte bit = index (fromIntegral c)
+                    prev <- peekByteOff t byte :: IO Word8
+                    pokeByteOff t byte (prev .|. bit)
+                    loop (n + 1)
+              in loop 0
+
+charClass :: String -> FastSet
+charClass = set . B8.pack . go
+    where go (a:'-':b:xs) = [a..b] ++ go xs
+          go (x:xs) = x : go xs
+          go _ = ""

File Data/Picoparsec/Combinator.hs