Commits

Bryan O'Sullivan committed 3c60d56

Rename Data.ParserCombinators.Attoparsec to Data.Attoparsec

Comments (0)

Files changed (19)

Data/Attoparsec.hs

+-----------------------------------------------------------------------------
+-- |
+-- Module      :  Data.Attoparsec
+-- Copyright   :  Daan Leijen 1999-2001, Jeremy Shaw 2006, Bryan O'Sullivan 2007-2008
+-- License     :  BSD3
+-- 
+-- Maintainer  :  bos@serpentine.com
+-- Stability   :  experimental
+-- Portability :  unknown
+--
+-- Simple, efficient parser combinators for lazy 'LB.ByteString'
+-- strings, loosely based on 'Text.ParserCombinators.Parsec'.
+-- 
+-----------------------------------------------------------------------------
+module Data.Attoparsec
+    (
+    -- * Parser
+      ParseError
+    , Parser
+
+    -- * Running parsers
+    , parse
+    , parseAt
+    , parseTest
+
+    -- * Combinators
+    , (<?>)
+
+    -- * Things vaguely like those in @Parsec.Combinator@ (and @Parsec.Prim@)
+    , try
+    , eof
+    , lookAhead
+    , peek
+
+    -- * Things like in @Parsec.Char@
+    , satisfy
+    , anyWord8
+    , word8
+    , notWord8
+    , string
+    , stringTransform
+
+    -- * Parser converters.
+    , eitherP
+
+    -- * Miscellaneous functions.
+    , getInput
+    , getConsumed
+    , takeWhile
+    , takeWhile1
+    , takeTill
+    , takeAll
+    , skipWhile
+    , notEmpty
+    , match
+    ) where
+
+import Data.Attoparsec.Internal
+import Prelude hiding (takeWhile)

Data/Attoparsec/Char8.hs

+{-# LANGUAGE CPP #-}
+-----------------------------------------------------------------------------
+-- |
+-- Module      :  Data.Attoparsec.Char8
+-- Copyright   :  Daan Leijen 1999-2001, Jeremy Shaw 2006, Bryan O'Sullivan 2007-2008
+-- License     :  BSD3
+-- 
+-- Maintainer  :  bos@serpentine.com
+-- Stability   :  experimental
+-- Portability :  unknown
+--
+-- Simple, efficient parser combinators for lazy 'LB.ByteString'
+-- strings, loosely based on 'Text.ParserCombinators.Parsec'.
+-- 
+-----------------------------------------------------------------------------
+module Data.Attoparsec.Char8
+    (
+    -- * Parser
+      ParseError
+    , Parser
+
+    -- * Running parsers
+    , parse
+    , parseAt
+    , parseTest
+
+    -- * Combinators
+    , (<?>)
+
+    -- * Things vaguely like those in @Parsec.Combinator@ (and @Parsec.Prim@)
+    , try
+    , eof
+    , lookAhead
+    , peek
+
+    -- * Things like in @Parsec.Char@
+    , satisfy
+    , letter
+    , digit
+    , anyChar
+    , space
+    , char
+    , notChar
+    , string
+    , stringCI
+
+    -- * Parser converters.
+    , eitherP
+
+    -- * Numeric parsers.
+    , int
+    , integer
+    , double
+
+    -- * Miscellaneous functions.
+    , getInput
+    , getConsumed
+    , takeWhile
+    , takeWhile1
+    , takeTill
+    , takeAll
+    , takeCount
+    , skipWhile
+    , skipSpace
+    , notEmpty
+    , match
+    , inClass
+    , notInClass
+    , endOfLine
+    ) where
+
+import Control.Applicative ((<$>))
+import qualified Data.ByteString.Char8 as SB
+import qualified Data.ByteString.Lazy.Char8 as LB
+import Data.ByteString.Internal (w2c)
+import Data.Char (isDigit, isLetter, isSpace, toLower)
+import Data.Attoparsec.FastSet
+    (FastSet, memberChar, set)
+import qualified Data.Attoparsec.Internal as I
+import Data.Attoparsec.Internal
+    (Parser, ParseError, (<?>), parse, parseAt, parseTest, try, eof,
+     lookAhead, peek, string,
+     eitherP, getInput, getConsumed, takeAll, takeCount, notEmpty, match,
+     endOfLine, setInput)
+import Data.ByteString.Lex.Lazy.Double (readDouble)
+import Prelude hiding (takeWhile)
+
+-- | Satisfy a literal string, ignoring case.
+stringCI :: LB.ByteString -> Parser LB.ByteString
+stringCI = I.stringTransform (LB.map toLower)
+{-# INLINE stringCI #-}
+
+takeWhile1 :: (Char -> Bool) -> Parser LB.ByteString
+takeWhile1 p = I.takeWhile1 (p . w2c)
+{-# INLINE takeWhile1 #-}
+
+numeric :: String -> (LB.ByteString -> Maybe (a,LB.ByteString)) -> Parser a
+numeric desc f = do
+  s <- getInput
+  case f s of
+    Nothing -> fail desc
+    Just (i,s') -> setInput s' >> return i
+                   
+-- | Parse an integer.  The position counter is not updated.
+int :: Parser Int
+int = numeric "Int" LB.readInt
+
+-- | Parse an integer.  The position counter is not updated.
+integer :: Parser Integer
+integer = numeric "Integer" LB.readInteger
+
+-- | Parse a Double.  The position counter is not updated.
+double :: Parser Double
+double = numeric "Double" readDouble
+
+#define PARSER Parser
+#include "Char8Boilerplate.h"

Data/Attoparsec/Char8Boilerplate.h

+-- -*- haskell -*-
+
+-- | Character parser.
+satisfy :: (Char -> Bool) -> PARSER Char
+satisfy p = w2c <$> I.satisfy (p . w2c)
+{-# INLINE satisfy #-}
+
+letter :: PARSER Char
+letter = satisfy isLetter <?> "letter"
+{-# INLINE letter #-}
+
+digit :: PARSER Char
+digit = satisfy isDigit <?> "digit"
+{-# INLINE digit #-}
+
+anyChar :: PARSER Char
+anyChar = satisfy $ const True
+{-# INLINE anyChar #-}
+
+space :: PARSER Char
+space = satisfy isSpace <?> "space"
+{-# INLINE space #-}
+
+-- | Satisfy a specific character.
+char :: Char -> PARSER Char
+char c = satisfy (== c) <?> [c]
+{-# INLINE char #-}
+
+-- | Satisfy a specific character.
+notChar :: Char -> PARSER Char
+notChar c = satisfy (/= c) <?> "not " ++ [c]
+{-# INLINE notChar #-}
+
+charClass :: String -> FastSet
+charClass = set . SB.pack . go
+    where go (a:'-':b:xs) = [a..b] ++ go xs
+          go (x:xs) = x : go xs
+          go _ = ""
+
+inClass :: String -> Char -> Bool
+inClass s = (`memberChar` myset)
+    where myset = charClass s
+{-# INLINE inClass #-}
+
+notInClass :: String -> Char -> Bool
+notInClass s = not . inClass s
+{-# INLINE notInClass #-}
+
+-- | Consume characters while the predicate is true.
+takeWhile :: (Char -> Bool) -> PARSER LB.ByteString
+takeWhile p = I.takeWhile (p . w2c)
+{-# INLINE takeWhile #-}
+
+takeTill :: (Char -> Bool) -> PARSER LB.ByteString
+takeTill p = I.takeTill (p . w2c)
+{-# INLINE takeTill #-}
+
+-- | Skip over characters while the predicate is true.
+skipWhile :: (Char -> Bool) -> PARSER ()
+skipWhile p = I.skipWhile (p . w2c)
+{-# INLINE skipWhile #-}
+
+-- | Skip over white space.
+skipSpace :: PARSER ()
+skipSpace = takeWhile isSpace >> return ()
+{-# INLINE skipSpace #-}

Data/Attoparsec/Combinator.hs

+{-# LANGUAGE BangPatterns, CPP #-}
+-----------------------------------------------------------------------------
+-- |
+-- Module      :  Data.Attoparsec.Combinator
+-- Copyright   :  Bryan O'Sullivan 2009
+-- License     :  BSD3
+-- 
+-- Maintainer  :  bos@serpentine.com
+-- Stability   :  experimental
+-- Portability :  unknown
+--
+-- Useful parser combinators, similar to Parsec.
+-- 
+-----------------------------------------------------------------------------
+module Data.Attoparsec.Combinator
+    (
+      choice
+    , count
+    , many
+    , many1
+    , manyTill
+    , sepBy
+    , sepBy1
+    , skipMany
+    , skipMany1
+    ) where
+
+import Control.Applicative
+
+choice :: Alternative f => [f a] -> f a
+choice = foldr (<|>) empty
+
+many1 :: Alternative f => f a -> f [a]
+many1 p = liftA2 (:) p (many p)
+
+sepBy :: Alternative f => f a -> f s -> f [a]
+sepBy p s = liftA2 (:) p ((s *> sepBy1 p s) <|> pure []) <|> pure []
+
+sepBy1 :: Alternative f => f a -> f s -> f [a]
+sepBy1 p s = liftA2 (:) p ((s *> sepBy1 p s) <|> pure [])
+
+manyTill :: Alternative f => f a -> f b -> f [a]
+manyTill p end = scan
+    where scan = (end *> pure []) <|> liftA2 (:) p scan
+
+-- | Skip zero or more instances of the parser.
+skipMany :: Alternative f => f a -> f ()
+skipMany p = scan
+    where scan = (p *> scan) <|> pure ()
+
+-- | Skip one or more instances of the parser.
+skipMany1 :: Alternative f => f a -> f ()
+skipMany1 p = p *> skipMany p
+
+-- | Apply the given parser repeatedly, returning every parse result.
+count :: Monad m => Int -> m a -> m [a]
+count n p = sequence (replicate n p)
+{-# INLINE count #-}

Data/Attoparsec/FastSet.hs

+{-# LANGUAGE BangPatterns #-}
+
+-----------------------------------------------------------------------------
+-- |
+-- Module      :  Data.Attoparsec.FastSet
+-- Copyright   :  Bryan O'Sullivan 2008
+-- License     :  BSD3
+-- 
+-- Maintainer  :  bos@serpentine.com
+-- Stability   :  experimental
+-- Portability :  unknown
+--
+-- Fast set membership tests for 'Word8' and 8-bit 'Char' values.  The
+-- set representation is unboxed for efficiency.  For sets of fewer
+-- than 32 elements, we test for membership using a binary search.
+-- For larger sets, we use a lookup table.
+-- 
+-----------------------------------------------------------------------------
+module Data.Attoparsec.FastSet
+    (
+    -- * Data type
+      FastSet
+    -- * Construction
+    , fromList
+    , set
+    -- * Lookup
+    , memberChar
+    , memberWord8
+    -- * Debugging
+    , fromSet
+    ) where
+
+import Data.Bits ((.&.), (.|.), shiftL, shiftR)
+import qualified Data.ByteString.Char8 as B8
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Internal as I
+import qualified Data.ByteString.Unsafe as U
+import Data.Word (Word8)
+import Foreign.Storable (peekByteOff, pokeByteOff)
+
+data FastSet = Sorted { fromSet :: {-# UNPACK #-} !B.ByteString }
+             | Table  { fromSet :: {-# UNPACK #-} !B.ByteString }
+    deriving (Eq, Ord)
+
+instance Show FastSet where
+    show (Sorted s) = "FastSet Sorted " ++ show (B8.unpack s)
+    show (Table _) = "FastSet Table"
+
+-- | The lower bound on the size of a lookup table.  We choose this to
+-- balance table density against performance.
+tableCutoff :: Int
+tableCutoff = 8
+
+-- | Create a set.
+set :: B.ByteString -> FastSet
+set s | B.length s < tableCutoff = Sorted . B.sort $ s
+      | otherwise                = Table . mkTable $ s
+
+fromList :: [Word8] -> FastSet
+fromList = set . B.pack
+
+index :: Int -> (Int, Word8)
+index i = (i `shiftR` 3, 1 `shiftL` (i .&. 7))
+
+-- | Check the set for membership.
+memberWord8 :: Word8 -> FastSet -> Bool
+memberWord8 w (Table t)  =
+    let (byte,bit) = index (fromIntegral w)
+    in U.unsafeIndex t byte .&. bit /= 0
+memberWord8 w (Sorted s) = search 0 (B.length s - 1)
+    where search lo hi
+              | hi < lo = False
+              | otherwise =
+                  let mid = (lo + hi) `div` 2
+                  in case compare w (U.unsafeIndex s mid) of
+                       GT -> search (mid + 1) hi
+                       LT -> search lo (mid - 1)
+                       _ -> True
+
+-- | Check the set for membership.  Only works with 8-bit characters:
+-- characters above code point 255 will give wrong answers.
+memberChar :: Char -> FastSet -> Bool
+memberChar c = memberWord8 (I.c2w c)
+
+mkTable :: B.ByteString -> B.ByteString
+mkTable s = I.unsafeCreate 32 $ \t -> do
+            I.memset t 0 32
+            U.unsafeUseAsCStringLen s $ \(p, l) ->
+              let loop n | n == l = return ()
+                         | otherwise = do
+                    c <- peekByteOff p n :: IO Word8
+                    let (byte,bit) = index (fromIntegral c)
+                    prev <- peekByteOff t byte :: IO Word8
+                    pokeByteOff t byte (prev .|. bit)
+                    loop (n + 1)
+              in loop 0

Data/Attoparsec/Incremental.hs

+{-# LANGUAGE BangPatterns, CPP #-}
+-----------------------------------------------------------------------------
+-- |
+-- Module      :  Data.Attoparsec.Incremental
+-- Copyright   :  Bryan O'Sullivan 2009
+-- License     :  BSD3
+-- 
+-- Maintainer  :  bos@serpentine.com
+-- Stability   :  experimental
+-- Portability :  unknown
+--
+-- Simple, efficient parser combinators for lazy 'LB.ByteString'
+-- strings, loosely based on 'Text.ParserCombinators.Parsec'.
+--
+-- This module is heavily influenced by Adam Langley's incremental
+-- parser in his binary-strict package.
+-- 
+-----------------------------------------------------------------------------
+module Data.Attoparsec.Incremental
+    (
+      Parser
+    , Result(..)
+    , parse
+    , parseWith
+    , parseTest
+
+    , (<?>)
+    , takeWhile
+    , takeTill
+    , takeCount
+    , string
+    , satisfy
+    , pushBack
+
+    , word8
+    , notWord8
+    , anyWord8
+
+    , skipWhile
+
+    , yield
+    ) where
+
+import Control.Applicative
+import Control.Monad (MonadPlus(..), ap)
+import Data.Attoparsec.Internal ((+:))
+import Data.Word (Word8)
+import qualified Data.ByteString as S
+import qualified Data.ByteString.Lazy as L
+import qualified Data.ByteString.Lazy.Internal as L
+import Prelude hiding (takeWhile)
+
+data S = S {-# UNPACK #-} !S.ByteString -- ^ first chunk of input
+           L.ByteString                 -- ^ rest of input
+           [L.ByteString]               -- ^ input acquired during backtracks
+           !Bool                        -- ^ have we hit EOF yet?
+           {-# UNPACK #-} !Int          -- ^ failure depth
+
+-- | The result of a partial parse
+data Result a = Failed String
+                -- ^ the parse failed with the given error message
+              | Done L.ByteString a
+                -- ^ the parse finished and produced the given list of
+                --   results doing so. Any unparsed data is returned.
+              | Partial (L.ByteString -> Result a)
+                -- ^ the parse ran out of data before finishing, but produced
+                --   the given list of results before doing so. To continue the
+                --   parse pass more data to the given continuation
+
+instance (Show a) => Show (Result a) where
+  show (Failed err)      = "Failed " ++ show err
+  show (Done L.Empty rs) = "Done Empty " ++ show rs
+  show (Done rest rs)    = "Done (" ++ show rest ++ ") " ++ show rs
+  show (Partial _)       = "Partial"
+
+-- | This is the internal version of the above. This is the type which is
+--   actually used by the code, as it has the extra information needed
+--   for backtracking. This is converted to an external friendly @Result@
+--   type just before giving it to the outside world.
+data IResult a = IFailed S String
+               | IDone S a
+               | IPartial (L.ByteString -> IResult a)
+
+instance Show (IResult a) where
+  show (IFailed _ err) = "IFailed " ++ show err
+  show (IDone _ _)     = "IDone"
+  show (IPartial _)    = "IPartial"
+
+newtype Parser r a = Parser {
+      unParser :: S -> (a -> S -> IResult r) -> IResult r
+    }
+
+instance Monad (Parser r) where
+  return a = Parser $ \s k -> k a s
+  m >>= k = Parser $ \s cont -> unParser m s $ \a s' -> unParser (k a) s' cont
+  fail err = Parser $ \s -> const $ IFailed s err
+
+zero :: Parser r a
+zero = fail ""
+
+-- | I'm not sure if this is a huge bodge or not. It probably is.
+--
+-- When performing a choice (in @plus@), the failure depth in the
+-- current state is incremented. If a failure is generated inside the
+-- attempted path, the state carried in the IFailure will have this
+-- incremented failure depth. However, we don't want to backtrack
+-- after the attempted path has completed. Thus we insert this cut
+-- continuation, which decrements the failure count of any failure
+-- passing though, thus it would be caught in @plus@ and doesn't
+-- trigger a backtrack.
+cutContinuation :: (a -> S -> IResult r) -> a -> S -> IResult r
+cutContinuation k v s =
+  case k v s of
+       IFailed (S lb i adds eof failDepth) err -> IFailed (S lb i adds eof (failDepth - 1)) err
+       x -> x
+
+appL :: L.ByteString -> L.ByteString -> L.ByteString
+appL xs L.Empty = xs
+appL L.Empty ys = ys
+appL xs ys      = xs `L.append` ys
+
+plus :: Parser r a -> Parser r a -> Parser r a
+plus p1 p2 =
+  Parser $ \(S sb lb adds eof failDepth) k ->
+    let
+      filt f@(IFailed (S _ _ adds' eof' failDepth') _)
+        | failDepth' == failDepth + 1 =
+            let lb' = lb `appL` L.concat (reverse adds')
+            in  unParser p2 (S sb lb' (adds' ++ adds) eof' failDepth) k
+        | otherwise = f
+      filt (IPartial cont) = IPartial (filt . cont)
+      filt v@(IDone _ _) = v
+    in
+      filt $ unParser p1 (S sb lb [] eof (failDepth + 1)) (cutContinuation k)
+
+instance Functor (Parser r) where
+    fmap f m = Parser $ \s cont -> unParser m s (cont . f)
+
+infix 0 <?>
+
+-- | Name the parser.
+(<?>) :: Parser r a -> String -> Parser r a
+{-# INLINE (<?>) #-}
+p <?> msg =
+  Parser $ \st k ->
+    case unParser p st k of
+      IFailed st' _ -> IFailed st' msg
+      ok -> ok
+
+initState :: L.ByteString -> S
+initState (L.Chunk sb lb) = S sb lb [] False 0
+initState _               = S S.empty L.empty [] False 0
+
+mkState :: L.ByteString -> [L.ByteString] -> Bool -> Int -> S
+mkState bs adds eof failDepth =
+    case bs of
+      L.Empty -> S S.empty L.empty adds eof failDepth
+      L.Chunk sb lb -> S sb lb adds eof failDepth
+
+addX :: L.ByteString -> [L.ByteString] -> [L.ByteString]
+addX s adds | L.null s = adds
+            | otherwise = s : adds
+
+yield :: Parser r ()
+yield = Parser $ \(S sb lb adds eof failDepth) k ->
+  IPartial $ \s -> k () (S sb (lb `appL` s) (addX s adds) eof failDepth)
+
+continue :: (S -> IResult r) -> Parser r a
+         -> (a -> S -> IResult r) -> S -> IResult r
+continue onEOF p k (S _sb _lb adds eof failDepth) =
+    if eof
+    then onEOF (S S.empty L.empty adds True failDepth)
+    else IPartial $ \s -> let st = contState s adds failDepth
+                          in unParser p st k
+
+takeWith :: (L.ByteString -> (L.ByteString, L.ByteString))
+         -> Parser r L.ByteString
+takeWith splitf =
+  Parser $ \st@(S sb lb adds eof failDepth) k ->
+  let (left,rest) = splitf (sb +: lb)
+  in if L.null rest
+     then continue (k left) (takeWith splitf) (k . appL left) st
+     else k left (mkState rest adds eof failDepth)
+    
+takeWhile :: (Word8 -> Bool) -> Parser r L.ByteString
+takeWhile = takeWith . L.span
+
+takeTill :: (Word8 -> Bool) -> Parser r L.ByteString
+takeTill = takeWith . L.break
+
+takeCount :: Int -> Parser r L.ByteString
+takeCount = tc . fromIntegral where
+ tc n = Parser $ \st@(S sb lb adds eof failDepth) k ->
+        let (h,t) = L.splitAt n (sb +: lb)
+            l = L.length h
+        in if L.length h == n
+           then k h (mkState t adds eof failDepth)
+           else continue (`IFailed` "takeCount: EOF")
+                         (tc (n - l)) (k . appL h) st
+
+string :: L.ByteString -> Parser r L.ByteString
+string s =
+  Parser $ \st@(S sb lb adds eof failDepth) k ->
+    case L.splitAt (L.length s) (sb +: lb) of
+      (h,t)
+        | h == s -> k s (mkState t adds eof failDepth)
+      (h,L.Empty)
+        | h `L.isPrefixOf` s ->
+            continue (`IFailed` "string: EOF")
+                     (string (L.drop (L.length h) s))
+                     (k . appL h)
+                     st
+      _ -> IFailed st "string failed to match"
+
+contState :: L.ByteString -> [L.ByteString] -> Int -> S
+contState s adds failDepth
+    | L.null s  = S S.empty L.empty [] True failDepth
+    | otherwise = mkState s (addX s adds) False failDepth
+
+satisfy :: (Word8 -> Bool) -> Parser r Word8
+satisfy p =
+  Parser $ \st@(S sb lb adds eof failDepth) k ->
+    case S.uncons sb of
+      Just (w, sb') | p w -> k w (S sb' lb adds eof failDepth)
+                    | otherwise -> IFailed st "failed to match"
+      Nothing -> case L.uncons lb of
+                   Just (w, lb') | p w -> k w (mkState lb' adds eof failDepth)
+                                 | otherwise -> IFailed st "failed to match"
+                   Nothing -> continue (`IFailed` "satisfy: EOF")
+                                       (satisfy p) k st
+
+pushBack :: L.ByteString -> Parser r ()
+pushBack bs =
+    Parser $ \(S sb lb adds eof failDepth) k ->
+        k () (mkState (bs `appL` (sb +: lb)) adds eof failDepth)
+
+toplevelTranslate :: IResult a -> Result a
+toplevelTranslate (IFailed _ err) = Failed err
+toplevelTranslate (IDone (S sb lb _ _ _) value) = Done (sb +: lb) value
+toplevelTranslate (IPartial k) = Partial $ toplevelTranslate . k
+
+terminalContinuation :: a -> S -> IResult a
+terminalContinuation v s = IDone s v
+
+parse :: Parser r r -> L.ByteString -> Result r
+parse m input =
+  toplevelTranslate $ unParser m (initState input) terminalContinuation
+
+parseWith :: Applicative f => f L.ByteString -> Parser r r -> L.ByteString
+          -> f (Result r)
+parseWith refill p s =
+  case parse p s of
+    Partial k -> k <$> refill
+    ok        -> pure ok
+
+parseTest :: (Show r) => Parser r r -> L.ByteString -> IO ()
+parseTest p s = print (parse p s)
+
+#define PARSER Parser r
+#include "Word8Boilerplate.h"

Data/Attoparsec/Incremental/Char8.hs

+{-# LANGUAGE CPP #-}
+-----------------------------------------------------------------------------
+-- |
+-- Module      :  Data.Attoparsec.Incremental.Char8
+-- Copyright   :  Daan Leijen 1999-2001, Jeremy Shaw 2006, Bryan O'Sullivan 2007-2008
+-- License     :  BSD3
+-- 
+-- Maintainer  :  bos@serpentine.com
+-- Stability   :  experimental
+-- Portability :  unknown
+--
+-- Simple, efficient parser combinators for lazy 'LB.ByteString'
+-- strings, loosely based on 'Text.ParserCombinators.Parsec'.
+-- 
+-----------------------------------------------------------------------------
+module Data.Attoparsec.Incremental.Char8
+    (
+    -- * Parser
+      Parser
+    , Result(..)
+
+    -- * Running parsers
+    , parse
+
+    -- * Combinators
+    , (<?>)
+
+    -- * Things vaguely like those in @Parsec.Combinator@ (and @Parsec.Prim@)
+    , pushBack
+
+    -- * Things like in @Parsec.Char@
+    , satisfy
+    , letter
+    , digit
+    , anyChar
+    , space
+    , char
+    , notChar
+    , string
+
+    -- * Numeric parsers.
+    , int
+    , integer
+    , double
+
+    -- * Miscellaneous functions.
+    , takeWhile
+    , takeTill
+    , takeCount
+    , skipWhile
+    , skipSpace
+    , inClass
+    , notInClass
+    ) where
+
+import Control.Applicative ((<$>))
+import qualified Data.ByteString.Char8 as SB
+import qualified Data.ByteString.Lazy.Char8 as LB
+import Data.ByteString.Internal (w2c)
+import Data.Char (isDigit, isLetter, isSpace)
+import Data.Attoparsec.FastSet
+    (FastSet, memberChar, set)
+import qualified Data.Attoparsec.Incremental as I
+import Data.Attoparsec.Incremental
+    (Parser, Result(..), (<?>), parse, pushBack,
+     string, takeCount)
+import Data.ByteString.Lex.Lazy.Double (readDouble)
+import Prelude hiding (takeWhile)
+
+numeric :: String -> (Char -> Bool)
+         -> (LB.ByteString -> Maybe (a,LB.ByteString)) -> Parser r a
+numeric desc p f = do
+  s <- takeWhile p
+  case f s of
+    Nothing -> pushBack s >> fail desc
+    Just (i,s') -> pushBack s' >> return i
+                   
+isIntegral :: Char -> Bool
+isIntegral c = isDigit c || c == '-'
+
+-- | Parse an integer.  The position counter is not updated.
+int :: Parser r Int
+int = numeric "Int" isIntegral LB.readInt
+
+-- | Parse an integer.  The position counter is not updated.
+integer :: Parser r Integer
+integer = numeric "Integer" isIntegral LB.readInteger
+
+-- | Parse a Double.  The position counter is not updated.
+double :: Parser r Double
+double = numeric "Double" isDouble readDouble
+    where isDouble c = isIntegral c || c == 'e' || c == '+'
+
+#define PARSER Parser r
+#include "../Char8Boilerplate.h"

Data/Attoparsec/Internal.hs

+{-# LANGUAGE CPP #-}
+-----------------------------------------------------------------------------
+-- |
+-- Module      :  Data.Attoparsec.Internal
+-- Copyright   :  Daan Leijen 1999-2001, Jeremy Shaw 2006, Bryan O'Sullivan 2007-2008
+-- License     :  BSD3
+-- 
+-- Maintainer  :  bos@serpentine.com
+-- Stability   :  experimental
+-- Portability :  unknown
+--
+-- Simple, efficient parser combinators for lazy 'LB.ByteString'
+-- strings, loosely based on 'Text.ParserCombinators.Parsec'.
+-- 
+-----------------------------------------------------------------------------
+module Data.Attoparsec.Internal
+    (
+    -- * Parser
+      ParseError
+    , Parser
+
+    -- * Running parsers
+    , parse
+    , parseAt
+    , parseTest
+
+    -- * Combinators
+    , (<?>)
+
+    -- * Things vaguely like those in @Parsec.Combinator@ (and @Parsec.Prim@)
+    , try
+    , eof
+    , lookAhead
+    , peek
+
+    -- * Things like in @Parsec.Char@
+    , satisfy
+    , anyWord8
+    , word8
+    , notWord8
+    , string
+    , stringTransform
+
+    -- * Parser converters.
+    , eitherP
+
+    -- * Miscellaneous functions.
+    , getInput
+    , getConsumed
+    , setInput
+    , takeWhile
+    , takeWhile1
+    , takeTill
+    , takeAll
+    , takeCount
+    , skipWhile
+    , notEmpty
+    , match
+    , endOfLine
+
+    -- * Utilities.
+    , (+:)
+    ) where
+
+import Control.Applicative
+import Control.Monad (MonadPlus(..), ap)
+import Control.Monad.Fix (MonadFix(..))
+import qualified Data.ByteString as SB
+import qualified Data.ByteString.Lazy as LB
+import qualified Data.ByteString.Lazy.Char8 as L8
+import qualified Data.ByteString.Unsafe as U
+import qualified Data.ByteString.Internal as I
+import qualified Data.ByteString.Lazy.Internal as LB
+import Data.Int (Int64)
+import Data.Word (Word8)
+import Prelude hiding (takeWhile)
+
+type ParseError = String
+
+-- State invariants:
+-- * If both strict and lazy bytestrings are empty, the entire input
+--   is considered to be empty.
+data S = S {-# UNPACK #-} !SB.ByteString
+           LB.ByteString
+           {-# UNPACK #-} !Int64
+
+newtype Parser a = Parser {
+      unParser :: S -> Either (LB.ByteString, [String]) (a, S)
+    }
+
+instance Functor Parser where
+    fmap f p =
+        Parser $ \s ->
+            case unParser p s of
+              Right (a, s') -> Right (f a, s')
+              Left err -> Left err
+
+instance Monad Parser where
+    return a = Parser $ \s -> Right (a, s)
+    m >>= f = Parser $ \s ->
+              case unParser m s of
+                Right (a, s') -> unParser (f a) s'
+                Left (s', msgs) -> Left (s', msgs)
+    fail err = Parser $ \(S sb lb _) -> Left (sb +: lb, [err])
+
+instance MonadFix Parser where
+    mfix f = Parser $ \s ->
+             let r = case r of
+                       Right (a, _) -> unParser (f a) s
+                       err -> err
+             in r
+
+zero :: Parser a
+zero = Parser $ \(S sb lb _) -> Left (sb +: lb, [])
+{-# INLINE zero #-}
+
+plus :: Parser a -> Parser a -> Parser a
+plus p1 p2 =
+    Parser $ \s@(S sb lb _) ->
+        case unParser p1 s of
+          Left (_, msgs1) -> 
+              case unParser p2 s of
+                Left (_, msgs2) -> Left (sb +: lb, (msgs1 ++ msgs2))
+                ok -> ok
+          ok -> ok
+{-# INLINE plus #-}
+
+mkState :: LB.ByteString -> Int64 -> S
+mkState s = case s of
+              LB.Empty -> S SB.empty s
+              LB.Chunk x xs -> S x xs
+
+-- | Turn our split representation back into a normal lazy ByteString.
+(+:) :: SB.ByteString -> LB.ByteString -> LB.ByteString
+sb +: lb | SB.null sb = lb
+         | otherwise = LB.Chunk sb lb
+{-# INLINE (+:) #-}
+
+infix 0 <?>
+
+-- | Name the parser.
+(<?>) :: Parser a -> String -> Parser a
+p <?> msg =
+    Parser $ \s@(S sb lb _) ->
+        case unParser p s of
+          (Left _) -> Left (sb +: lb, [msg])
+          ok -> ok
+{-# INLINE (<?>) #-}
+
+nextChunk :: Parser ()
+nextChunk = Parser $ \(S _ lb n) ->
+            case lb of
+              LB.Chunk sb' lb' -> Right ((), S sb' lb' n)
+              LB.Empty -> Left (lb, [])
+
+-- | Get remaining input.
+getInput :: Parser LB.ByteString
+getInput = Parser $ \s@(S sb lb _) -> Right (sb +: lb, s)
+
+-- | Set the remaining input.
+setInput :: LB.ByteString -> Parser ()
+setInput bs = Parser $ \(S _ _ n) -> Right ((), mkState bs n)
+
+-- | Get number of bytes consumed so far.
+getConsumed :: Parser Int64
+getConsumed = Parser $ \s@(S _ _ n) -> Right (n, s)
+
+-- | Character parser.
+satisfy :: (Word8 -> Bool) -> Parser Word8
+satisfy p =
+    Parser $ \s@(S sb lb n) ->
+           case SB.uncons sb of
+             Just (c, sb') | p c -> Right (c, mkState (sb' +: lb) (n + 1))
+                           | otherwise -> Left (sb +: lb, [])
+             Nothing -> unParser (nextChunk >> satisfy p) s
+{-# INLINE satisfy #-}
+
+-- | Satisfy a literal string.
+string :: LB.ByteString -> Parser LB.ByteString
+string s = Parser $ \(S sb lb n) ->
+           let bs = sb +: lb
+               l = LB.length s
+               (h,t) = LB.splitAt l bs
+           in if s == h
+              then Right (s, mkState t (n + l))
+              else Left (bs, [])
+{-# INLINE string #-}
+
+endOfLine :: Parser ()
+endOfLine = Parser $ \(S sb lb n) ->
+            let bs = sb +: lb
+            in if SB.null sb
+               then Left (bs, ["EOL"])
+               else case I.w2c (U.unsafeHead sb) of
+                     '\n' -> Right ((), mkState (LB.tail bs) (n + 1))
+                     '\r' -> let (h,t) = LB.splitAt 2 bs
+                                 rn = L8.pack "\r\n"
+                             in if h == rn
+                                then Right ((), mkState t (n + 2))
+                                else Right ((), mkState (LB.tail bs) (n + 1))
+                     _ -> Left (bs, ["EOL"])
+
+-- | Satisfy a literal string, after applying a transformation to both
+-- it and the matching text.
+stringTransform :: (LB.ByteString -> LB.ByteString) -> LB.ByteString
+                -> Parser LB.ByteString
+stringTransform f s = Parser $ \(S sb lb n) ->
+             let bs = sb +: lb
+                 l = LB.length s
+                 (h, t) = LB.splitAt l bs
+             in if fs == f h
+                then Right (s, mkState t (n + l))
+                else Left (bs, [])
+    where fs = f s
+{-# INLINE stringTransform #-}
+
+try :: Parser a -> Parser a
+try p = Parser $ \s@(S sb lb _) ->
+        case unParser p s of
+          Left (_, msgs) -> Left (sb +: lb, msgs)
+          ok -> ok
+
+-- | Detect 'end of file'.
+eof :: Parser ()
+eof = Parser $ \s@(S sb lb _) -> if SB.null sb && LB.null lb
+                                 then Right ((), s)
+                                 else Left (sb +: lb, ["EOF"])
+
+takeAll :: Parser LB.ByteString
+takeAll = Parser $ \(S sb lb n) ->
+          let bs = sb +: lb
+          in Right (bs, mkState LB.empty (n + LB.length bs))
+
+takeCount :: Int -> Parser LB.ByteString
+takeCount k =
+  Parser $ \(S sb lb n) ->
+      let bs = sb +: lb
+          k' = fromIntegral k
+          (h,t) = LB.splitAt k' bs
+      in if LB.length h == k'
+         then Right (h, mkState t (n + k'))
+         else Left (bs, [show k ++ " bytes"])
+
+-- | Consume characters while the predicate is true.
+takeWhile :: (Word8 -> Bool) -> Parser LB.ByteString
+takeWhile p =
+    Parser $ \(S sb lb n) ->
+    let (h,t) = LB.span p (sb +: lb)
+    in Right (h, mkState t (n + LB.length h))
+{-# INLINE takeWhile #-}
+
+takeTill :: (Word8 -> Bool) -> Parser LB.ByteString
+takeTill p =
+  Parser $ \(S sb lb n) ->
+  case LB.break p (sb +: lb) of
+    (h,t) | LB.null t -> Left (h, [])
+          | otherwise -> Right (h, mkState t (n + LB.length h))
+{-# INLINE takeTill #-}
+
+takeWhile1 :: (Word8 -> Bool) -> Parser LB.ByteString
+takeWhile1 p =
+    Parser $ \(S sb lb n) ->
+    case LB.span p (sb +: lb) of
+      (h,t) | LB.null h -> Left (t, [])
+            | otherwise -> Right (h, mkState t (n + LB.length h))
+{-# INLINE takeWhile1 #-}
+
+-- | Test that a parser returned a non-null ByteString.
+notEmpty :: Parser LB.ByteString -> Parser LB.ByteString 
+notEmpty p = Parser $ \s ->
+             case unParser p s of
+               o@(Right (a, _)) ->
+                   if LB.null a
+                   then Left (a, ["notEmpty"])
+                   else o
+               x -> x
+
+-- | Parse some input with the given parser and return that input
+-- without copying it.
+match :: Parser a -> Parser LB.ByteString
+match p = do bs <- getInput
+             start <- getConsumed
+             p
+             end <- getConsumed
+             return (LB.take (end - start) bs)
+
+eitherP :: Parser a -> Parser b -> Parser (Either a b)
+eitherP a b = (Left <$> a) <|> (Right <$> b)
+{-# INLINE eitherP #-}
+
+peek :: Parser a -> Parser (Maybe a)
+peek p = Parser $ \s ->
+         case unParser p s of
+           Right (m, _) -> Right (Just m, s)
+           _ -> Right (Nothing, s)
+
+lookAhead :: Parser a -> Parser a
+lookAhead p = Parser $ \s ->
+         case unParser p s of
+           Right (m, _) -> Right (m, s)
+           Left (e, bs) -> Left (e, bs)
+
+parseAt :: Parser a -> LB.ByteString -> Int64
+        -> (LB.ByteString, Either ParseError (a, Int64))
+parseAt p bs n = 
+    case unParser p (mkState bs n) of
+      Left (bs', msg) -> (bs', Left $ showError msg)
+      Right (a, ~(S sb lb n')) -> (sb +: lb, Right (a, n'))
+    where
+      showError [""] = "Parser error\n"
+      showError [msg] = "Parser error, expected:\n" ++ msg ++ "\n"
+      showError [] = "Parser error\n"
+      showError msgs = "Parser error, expected one of:\n" ++ unlines msgs
+
+-- | Run a parser.
+parse :: Parser a -> LB.ByteString
+      -> (LB.ByteString, Either ParseError a)
+parse p bs = case parseAt p bs 0 of
+               (bs', Right (a, _)) -> (bs', Right a)
+               (bs', Left err) -> (bs', Left err)
+
+parseTest :: (Show a) => Parser a -> LB.ByteString -> IO ()
+parseTest p s =
+    case parse p s of
+      (st, Left msg) -> putStrLn $ msg ++ "\nGot:\n" ++ show st
+      (_, Right r) -> print r
+
+#define PARSER Parser
+#include "Word8Boilerplate.h"

Data/Attoparsec/Word8Boilerplate.h

+-- -*- haskell -*-
+-- This file is intended to be #included by other source files.
+
+instance MonadPlus (PARSER) where
+    mzero = zero
+    mplus = plus
+
+instance Applicative (PARSER) where
+    pure = return
+    (<*>) = ap
+
+instance Alternative (PARSER) where
+    empty = zero
+    (<|>) = plus
+
+-- | Skip over characters while the predicate is true.
+skipWhile :: (Word8 -> Bool) -> PARSER ()
+skipWhile p = takeWhile p *> pure ()
+{-# INLINE skipWhile #-}
+
+anyWord8 :: PARSER Word8
+anyWord8 = satisfy $ const True
+{-# INLINE anyWord8 #-}
+
+-- | Satisfy a specific character.
+word8 :: Word8 -> PARSER Word8
+word8 c = satisfy (== c) <?> show c
+{-# INLINE word8 #-}
+
+-- | Satisfy a specific character.
+notWord8 :: Word8 -> PARSER Word8
+notWord8 c = satisfy (/= c) <?> "not " ++ show c
+{-# INLINE notWord8 #-}
 build-type:      Simple
 description:     Fast, flexible text-oriented parsing of lazy ByteStrings.
 extra-source-files:
-                 src/Data/ParserCombinators/Attoparsec/Char8Boilerplate.h
-                 src/Data/ParserCombinators/Attoparsec/Word8Boilerplate.h
+                 Data/Attoparsec/Char8Boilerplate.h
+                 Data/Attoparsec/Word8Boilerplate.h
 
 flag split-base
 flag applicative-in-base
   build-depends: bytestring-lexing >= 0.2
 
   extensions:      CPP
-  exposed-modules: Data.ParserCombinators.Attoparsec
-                   Data.ParserCombinators.Attoparsec.Char8
-                   Data.ParserCombinators.Attoparsec.Combinator
-                   Data.ParserCombinators.Attoparsec.Incremental
-                   Data.ParserCombinators.Attoparsec.Incremental.Char8
-                   Data.ParserCombinators.Attoparsec.FastSet
-                   Data.ParserCombinators.Attoparsec.Internal
-  hs-source-dirs:  src
+  exposed-modules: Data.Attoparsec
+                   Data.Attoparsec.Char8
+                   Data.Attoparsec.Combinator
+                   Data.Attoparsec.Incremental
+                   Data.Attoparsec.Incremental.Char8
+                   Data.Attoparsec.FastSet
+                   Data.Attoparsec.Internal
   ghc-options:     -O2 -Wall -funbox-strict-fields
                    -fliberate-case-threshold=1000

src/Data/ParserCombinators/Attoparsec.hs

------------------------------------------------------------------------------
--- |
--- Module      :  Data.ParserCombinators.Attoparsec
--- Copyright   :  Daan Leijen 1999-2001, Jeremy Shaw 2006, Bryan O'Sullivan 2007-2008
--- License     :  BSD3
--- 
--- Maintainer  :  bos@serpentine.com
--- Stability   :  experimental
--- Portability :  unknown
---
--- Simple, efficient parser combinators for lazy 'LB.ByteString'
--- strings, loosely based on 'Text.ParserCombinators.Parsec'.
--- 
------------------------------------------------------------------------------
-module Data.ParserCombinators.Attoparsec
-    (
-    -- * Parser
-      ParseError
-    , Parser
-
-    -- * Running parsers
-    , parse
-    , parseAt
-    , parseTest
-
-    -- * Combinators
-    , (<?>)
-
-    -- * Things vaguely like those in @Parsec.Combinator@ (and @Parsec.Prim@)
-    , try
-    , eof
-    , lookAhead
-    , peek
-
-    -- * Things like in @Parsec.Char@
-    , satisfy
-    , anyWord8
-    , word8
-    , notWord8
-    , string
-    , stringTransform
-
-    -- * Parser converters.
-    , eitherP
-
-    -- * Miscellaneous functions.
-    , getInput
-    , getConsumed
-    , takeWhile
-    , takeWhile1
-    , takeTill
-    , takeAll
-    , skipWhile
-    , notEmpty
-    , match
-    ) where
-
-import Data.ParserCombinators.Attoparsec.Internal
-import Prelude hiding (takeWhile)

src/Data/ParserCombinators/Attoparsec/Char8.hs

-{-# LANGUAGE CPP #-}
------------------------------------------------------------------------------
--- |
--- Module      :  Data.ParserCombinators.Attoparsec.Char8
--- Copyright   :  Daan Leijen 1999-2001, Jeremy Shaw 2006, Bryan O'Sullivan 2007-2008
--- License     :  BSD3
--- 
--- Maintainer  :  bos@serpentine.com
--- Stability   :  experimental
--- Portability :  unknown
---
--- Simple, efficient parser combinators for lazy 'LB.ByteString'
--- strings, loosely based on 'Text.ParserCombinators.Parsec'.
--- 
------------------------------------------------------------------------------
-module Data.ParserCombinators.Attoparsec.Char8
-    (
-    -- * Parser
-      ParseError
-    , Parser
-
-    -- * Running parsers
-    , parse
-    , parseAt
-    , parseTest
-
-    -- * Combinators
-    , (<?>)
-
-    -- * Things vaguely like those in @Parsec.Combinator@ (and @Parsec.Prim@)
-    , try
-    , eof
-    , lookAhead
-    , peek
-
-    -- * Things like in @Parsec.Char@
-    , satisfy
-    , letter
-    , digit
-    , anyChar
-    , space
-    , char
-    , notChar
-    , string
-    , stringCI
-
-    -- * Parser converters.
-    , eitherP
-
-    -- * Numeric parsers.
-    , int
-    , integer
-    , double
-
-    -- * Miscellaneous functions.
-    , getInput
-    , getConsumed
-    , takeWhile
-    , takeWhile1
-    , takeTill
-    , takeAll
-    , takeCount
-    , skipWhile
-    , skipSpace
-    , notEmpty
-    , match
-    , inClass
-    , notInClass
-    , endOfLine
-    ) where
-
-import Control.Applicative ((<$>))
-import qualified Data.ByteString.Char8 as SB
-import qualified Data.ByteString.Lazy.Char8 as LB
-import Data.ByteString.Internal (w2c)
-import Data.Char (isDigit, isLetter, isSpace, toLower)
-import Data.ParserCombinators.Attoparsec.FastSet
-    (FastSet, memberChar, set)
-import qualified Data.ParserCombinators.Attoparsec.Internal as I
-import Data.ParserCombinators.Attoparsec.Internal
-    (Parser, ParseError, (<?>), parse, parseAt, parseTest, try, eof,
-     lookAhead, peek, string,
-     eitherP, getInput, getConsumed, takeAll, takeCount, notEmpty, match,
-     endOfLine, setInput)
-import Data.ByteString.Lex.Lazy.Double (readDouble)
-import Prelude hiding (takeWhile)
-
--- | Satisfy a literal string, ignoring case.
-stringCI :: LB.ByteString -> Parser LB.ByteString
-stringCI = I.stringTransform (LB.map toLower)
-{-# INLINE stringCI #-}
-
-takeWhile1 :: (Char -> Bool) -> Parser LB.ByteString
-takeWhile1 p = I.takeWhile1 (p . w2c)
-{-# INLINE takeWhile1 #-}
-
-numeric :: String -> (LB.ByteString -> Maybe (a,LB.ByteString)) -> Parser a
-numeric desc f = do
-  s <- getInput
-  case f s of
-    Nothing -> fail desc
-    Just (i,s') -> setInput s' >> return i
-                   
--- | Parse an integer.  The position counter is not updated.
-int :: Parser Int
-int = numeric "Int" LB.readInt
-
--- | Parse an integer.  The position counter is not updated.
-integer :: Parser Integer
-integer = numeric "Integer" LB.readInteger
-
--- | Parse a Double.  The position counter is not updated.
-double :: Parser Double
-double = numeric "Double" readDouble
-
-#define PARSER Parser
-#include "Char8Boilerplate.h"

src/Data/ParserCombinators/Attoparsec/Char8Boilerplate.h

--- -*- haskell -*-
-
--- | Character parser.
-satisfy :: (Char -> Bool) -> PARSER Char
-satisfy p = w2c <$> I.satisfy (p . w2c)
-{-# INLINE satisfy #-}
-
-letter :: PARSER Char
-letter = satisfy isLetter <?> "letter"
-{-# INLINE letter #-}
-
-digit :: PARSER Char
-digit = satisfy isDigit <?> "digit"
-{-# INLINE digit #-}
-
-anyChar :: PARSER Char
-anyChar = satisfy $ const True
-{-# INLINE anyChar #-}
-
-space :: PARSER Char
-space = satisfy isSpace <?> "space"
-{-# INLINE space #-}
-
--- | Satisfy a specific character.
-char :: Char -> PARSER Char
-char c = satisfy (== c) <?> [c]
-{-# INLINE char #-}
-
--- | Satisfy a specific character.
-notChar :: Char -> PARSER Char
-notChar c = satisfy (/= c) <?> "not " ++ [c]
-{-# INLINE notChar #-}
-
-charClass :: String -> FastSet
-charClass = set . SB.pack . go
-    where go (a:'-':b:xs) = [a..b] ++ go xs
-          go (x:xs) = x : go xs
-          go _ = ""
-
-inClass :: String -> Char -> Bool
-inClass s = (`memberChar` myset)
-    where myset = charClass s
-{-# INLINE inClass #-}
-
-notInClass :: String -> Char -> Bool
-notInClass s = not . inClass s
-{-# INLINE notInClass #-}
-
--- | Consume characters while the predicate is true.
-takeWhile :: (Char -> Bool) -> PARSER LB.ByteString
-takeWhile p = I.takeWhile (p . w2c)
-{-# INLINE takeWhile #-}
-
-takeTill :: (Char -> Bool) -> PARSER LB.ByteString
-takeTill p = I.takeTill (p . w2c)
-{-# INLINE takeTill #-}
-
--- | Skip over characters while the predicate is true.
-skipWhile :: (Char -> Bool) -> PARSER ()
-skipWhile p = I.skipWhile (p . w2c)
-{-# INLINE skipWhile #-}
-
--- | Skip over white space.
-skipSpace :: PARSER ()
-skipSpace = takeWhile isSpace >> return ()
-{-# INLINE skipSpace #-}

src/Data/ParserCombinators/Attoparsec/Combinator.hs

-{-# LANGUAGE BangPatterns, CPP #-}
------------------------------------------------------------------------------
--- |
--- Module      :  Data.ParserCombinators.Attoparsec.Combinator
--- Copyright   :  Bryan O'Sullivan 2009
--- License     :  BSD3
--- 
--- Maintainer  :  bos@serpentine.com
--- Stability   :  experimental
--- Portability :  unknown
---
--- Useful parser combinators, similar to Parsec.
--- 
------------------------------------------------------------------------------
-module Data.ParserCombinators.Attoparsec.Combinator
-    (
-      choice
-    , count
-    , many
-    , many1
-    , manyTill
-    , sepBy
-    , sepBy1
-    , skipMany
-    , skipMany1
-    ) where
-
-import Control.Applicative
-
-choice :: Alternative f => [f a] -> f a
-choice = foldr (<|>) empty
-
-many1 :: Alternative f => f a -> f [a]
-many1 p = liftA2 (:) p (many p)
-
-sepBy :: Alternative f => f a -> f s -> f [a]
-sepBy p s = liftA2 (:) p ((s *> sepBy1 p s) <|> pure []) <|> pure []
-
-sepBy1 :: Alternative f => f a -> f s -> f [a]
-sepBy1 p s = liftA2 (:) p ((s *> sepBy1 p s) <|> pure [])
-
-manyTill :: Alternative f => f a -> f b -> f [a]
-manyTill p end = scan
-    where scan = (end *> pure []) <|> liftA2 (:) p scan
-
--- | Skip zero or more instances of the parser.
-skipMany :: Alternative f => f a -> f ()
-skipMany p = scan
-    where scan = (p *> scan) <|> pure ()
-
--- | Skip one or more instances of the parser.
-skipMany1 :: Alternative f => f a -> f ()
-skipMany1 p = p *> skipMany p
-
--- | Apply the given parser repeatedly, returning every parse result.
-count :: Monad m => Int -> m a -> m [a]
-count n p = sequence (replicate n p)
-{-# INLINE count #-}

src/Data/ParserCombinators/Attoparsec/FastSet.hs

-{-# LANGUAGE BangPatterns #-}
-
------------------------------------------------------------------------------
--- |
--- Module      :  Data.ParserCombinators.Attoparsec.FastSet
--- Copyright   :  Bryan O'Sullivan 2008
--- License     :  BSD3
--- 
--- Maintainer  :  bos@serpentine.com
--- Stability   :  experimental
--- Portability :  unknown
---
--- Fast set membership tests for 'Word8' and 8-bit 'Char' values.  The
--- set representation is unboxed for efficiency.  For sets of fewer
--- than 32 elements, we test for membership using a binary search.
--- For larger sets, we use a lookup table.
--- 
------------------------------------------------------------------------------
-module Data.ParserCombinators.Attoparsec.FastSet
-    (
-    -- * Data type
-      FastSet
-    -- * Construction
-    , fromList
-    , set
-    -- * Lookup
-    , memberChar
-    , memberWord8
-    -- * Debugging
-    , fromSet
-    ) where
-
-import Data.Bits ((.&.), (.|.), shiftL, shiftR)
-import qualified Data.ByteString.Char8 as B8
-import qualified Data.ByteString as B
-import qualified Data.ByteString.Internal as I
-import qualified Data.ByteString.Unsafe as U
-import Data.Word (Word8)
-import Foreign.Storable (peekByteOff, pokeByteOff)
-
-data FastSet = Sorted { fromSet :: {-# UNPACK #-} !B.ByteString }
-             | Table  { fromSet :: {-# UNPACK #-} !B.ByteString }
-    deriving (Eq, Ord)
-
-instance Show FastSet where
-    show (Sorted s) = "FastSet Sorted " ++ show (B8.unpack s)
-    show (Table _) = "FastSet Table"
-
--- | The lower bound on the size of a lookup table.  We choose this to
--- balance table density against performance.
-tableCutoff :: Int
-tableCutoff = 8
-
--- | Create a set.
-set :: B.ByteString -> FastSet
-set s | B.length s < tableCutoff = Sorted . B.sort $ s
-      | otherwise                = Table . mkTable $ s
-
-fromList :: [Word8] -> FastSet
-fromList = set . B.pack
-
-index :: Int -> (Int, Word8)
-index i = (i `shiftR` 3, 1 `shiftL` (i .&. 7))
-
--- | Check the set for membership.
-memberWord8 :: Word8 -> FastSet -> Bool
-memberWord8 w (Table t)  =
-    let (byte,bit) = index (fromIntegral w)
-    in U.unsafeIndex t byte .&. bit /= 0
-memberWord8 w (Sorted s) = search 0 (B.length s - 1)
-    where search lo hi
-              | hi < lo = False
-              | otherwise =
-                  let mid = (lo + hi) `div` 2
-                  in case compare w (U.unsafeIndex s mid) of
-                       GT -> search (mid + 1) hi
-                       LT -> search lo (mid - 1)
-                       _ -> True
-
--- | Check the set for membership.  Only works with 8-bit characters:
--- characters above code point 255 will give wrong answers.
-memberChar :: Char -> FastSet -> Bool
-memberChar c = memberWord8 (I.c2w c)
-
-mkTable :: B.ByteString -> B.ByteString
-mkTable s = I.unsafeCreate 32 $ \t -> do
-            I.memset t 0 32
-            U.unsafeUseAsCStringLen s $ \(p, l) ->
-              let loop n | n == l = return ()
-                         | otherwise = do
-                    c <- peekByteOff p n :: IO Word8
-                    let (byte,bit) = index (fromIntegral c)
-                    prev <- peekByteOff t byte :: IO Word8
-                    pokeByteOff t byte (prev .|. bit)
-                    loop (n + 1)
-              in loop 0

src/Data/ParserCombinators/Attoparsec/Incremental.hs

-{-# LANGUAGE BangPatterns, CPP #-}
------------------------------------------------------------------------------
--- |
--- Module      :  Data.ParserCombinators.Attoparsec.Incremental
--- Copyright   :  Bryan O'Sullivan 2009
--- License     :  BSD3
--- 
--- Maintainer  :  bos@serpentine.com
--- Stability   :  experimental
--- Portability :  unknown
---
--- Simple, efficient parser combinators for lazy 'LB.ByteString'
--- strings, loosely based on 'Text.ParserCombinators.Parsec'.
---
--- This module is heavily influenced by Adam Langley's incremental
--- parser in his binary-strict package.
--- 
------------------------------------------------------------------------------
-module Data.ParserCombinators.Attoparsec.Incremental
-    (
-      Parser
-    , Result(..)
-    , parse
-    , parseWith
-    , parseTest
-
-    , (<?>)
-    , takeWhile
-    , takeTill
-    , takeCount
-    , string
-    , satisfy
-    , pushBack
-
-    , word8
-    , notWord8
-    , anyWord8
-
-    , skipWhile
-
-    , yield
-    ) where
-
-import Control.Applicative
-import Control.Monad (MonadPlus(..), ap)
-import Data.ParserCombinators.Attoparsec.Internal ((+:))
-import Data.Word (Word8)
-import qualified Data.ByteString as S
-import qualified Data.ByteString.Lazy as L
-import qualified Data.ByteString.Lazy.Internal as L
-import Prelude hiding (takeWhile)
-
-data S = S {-# UNPACK #-} !S.ByteString -- ^ first chunk of input
-           L.ByteString                 -- ^ rest of input
-           [L.ByteString]               -- ^ input acquired during backtracks
-           !Bool                        -- ^ have we hit EOF yet?
-           {-# UNPACK #-} !Int          -- ^ failure depth
-
--- | The result of a partial parse
-data Result a = Failed String
-                -- ^ the parse failed with the given error message
-              | Done L.ByteString a
-                -- ^ the parse finished and produced the given list of
-                --   results doing so. Any unparsed data is returned.
-              | Partial (L.ByteString -> Result a)
-                -- ^ the parse ran out of data before finishing, but produced
-                --   the given list of results before doing so. To continue the
-                --   parse pass more data to the given continuation
-
-instance (Show a) => Show (Result a) where
-  show (Failed err)      = "Failed " ++ show err
-  show (Done L.Empty rs) = "Done Empty " ++ show rs
-  show (Done rest rs)    = "Done (" ++ show rest ++ ") " ++ show rs
-  show (Partial _)       = "Partial"
-
--- | This is the internal version of the above. This is the type which is
---   actually used by the code, as it has the extra information needed
---   for backtracking. This is converted to an external friendly @Result@
---   type just before giving it to the outside world.
-data IResult a = IFailed S String
-               | IDone S a
-               | IPartial (L.ByteString -> IResult a)
-
-instance Show (IResult a) where
-  show (IFailed _ err) = "IFailed " ++ show err
-  show (IDone _ _)     = "IDone"
-  show (IPartial _)    = "IPartial"
-
-newtype Parser r a = Parser {
-      unParser :: S -> (a -> S -> IResult r) -> IResult r
-    }
-
-instance Monad (Parser r) where
-  return a = Parser $ \s k -> k a s
-  m >>= k = Parser $ \s cont -> unParser m s $ \a s' -> unParser (k a) s' cont
-  fail err = Parser $ \s -> const $ IFailed s err
-
-zero :: Parser r a
-zero = fail ""
-
--- | I'm not sure if this is a huge bodge or not. It probably is.
---
--- When performing a choice (in @plus@), the failure depth in the
--- current state is incremented. If a failure is generated inside the
--- attempted path, the state carried in the IFailure will have this
--- incremented failure depth. However, we don't want to backtrack
--- after the attempted path has completed. Thus we insert this cut
--- continuation, which decrements the failure count of any failure
--- passing though, thus it would be caught in @plus@ and doesn't
--- trigger a backtrack.
-cutContinuation :: (a -> S -> IResult r) -> a -> S -> IResult r
-cutContinuation k v s =
-  case k v s of
-       IFailed (S lb i adds eof failDepth) err -> IFailed (S lb i adds eof (failDepth - 1)) err
-       x -> x
-
-appL :: L.ByteString -> L.ByteString -> L.ByteString
-appL xs L.Empty = xs
-appL L.Empty ys = ys
-appL xs ys      = xs `L.append` ys
-
-plus :: Parser r a -> Parser r a -> Parser r a
-plus p1 p2 =
-  Parser $ \(S sb lb adds eof failDepth) k ->
-    let
-      filt f@(IFailed (S _ _ adds' eof' failDepth') _)
-        | failDepth' == failDepth + 1 =
-            let lb' = lb `appL` L.concat (reverse adds')
-            in  unParser p2 (S sb lb' (adds' ++ adds) eof' failDepth) k
-        | otherwise = f
-      filt (IPartial cont) = IPartial (filt . cont)
-      filt v@(IDone _ _) = v
-    in
-      filt $ unParser p1 (S sb lb [] eof (failDepth + 1)) (cutContinuation k)
-
-instance Functor (Parser r) where
-    fmap f m = Parser $ \s cont -> unParser m s (cont . f)
-
-infix 0 <?>
-
--- | Name the parser.
-(<?>) :: Parser r a -> String -> Parser r a
-{-# INLINE (<?>) #-}
-p <?> msg =
-  Parser $ \st k ->
-    case unParser p st k of
-      IFailed st' _ -> IFailed st' msg
-      ok -> ok
-
-initState :: L.ByteString -> S
-initState (L.Chunk sb lb) = S sb lb [] False 0
-initState _               = S S.empty L.empty [] False 0
-
-mkState :: L.ByteString -> [L.ByteString] -> Bool -> Int -> S
-mkState bs adds eof failDepth =
-    case bs of
-      L.Empty -> S S.empty L.empty adds eof failDepth
-      L.Chunk sb lb -> S sb lb adds eof failDepth
-
-addX :: L.ByteString -> [L.ByteString] -> [L.ByteString]
-addX s adds | L.null s = adds
-            | otherwise = s : adds
-
-yield :: Parser r ()
-yield = Parser $ \(S sb lb adds eof failDepth) k ->
-  IPartial $ \s -> k () (S sb (lb `appL` s) (addX s adds) eof failDepth)
-
-continue :: (S -> IResult r) -> Parser r a
-         -> (a -> S -> IResult r) -> S -> IResult r
-continue onEOF p k (S _sb _lb adds eof failDepth) =
-    if eof
-    then onEOF (S S.empty L.empty adds True failDepth)
-    else IPartial $ \s -> let st = contState s adds failDepth
-                          in unParser p st k
-
-takeWith :: (L.ByteString -> (L.ByteString, L.ByteString))
-         -> Parser r L.ByteString
-takeWith splitf =
-  Parser $ \st@(S sb lb adds eof failDepth) k ->
-  let (left,rest) = splitf (sb +: lb)
-  in if L.null rest
-     then continue (k left) (takeWith splitf) (k . appL left) st
-     else k left (mkState rest adds eof failDepth)
-    
-takeWhile :: (Word8 -> Bool) -> Parser r L.ByteString
-takeWhile = takeWith . L.span
-
-takeTill :: (Word8 -> Bool) -> Parser r L.ByteString
-takeTill = takeWith . L.break
-
-takeCount :: Int -> Parser r L.ByteString
-takeCount = tc . fromIntegral where
- tc n = Parser $ \st@(S sb lb adds eof failDepth) k ->
-        let (h,t) = L.splitAt n (sb +: lb)
-            l = L.length h
-        in if L.length h == n
-           then k h (mkState t adds eof failDepth)
-           else continue (`IFailed` "takeCount: EOF")
-                         (tc (n - l)) (k . appL h) st
-
-string :: L.ByteString -> Parser r L.ByteString
-string s =
-  Parser $ \st@(S sb lb adds eof failDepth) k ->
-    case L.splitAt (L.length s) (sb +: lb) of
-      (h,t)
-        | h == s -> k s (mkState t adds eof failDepth)
-      (h,L.Empty)
-        | h `L.isPrefixOf` s ->
-            continue (`IFailed` "string: EOF")
-                     (string (L.drop (L.length h) s))
-                     (k . appL h)
-                     st
-      _ -> IFailed st "string failed to match"
-
-contState :: L.ByteString -> [L.ByteString] -> Int -> S
-contState s adds failDepth
-    | L.null s  = S S.empty L.empty [] True failDepth
-    | otherwise = mkState s (addX s adds) False failDepth
-
-satisfy :: (Word8 -> Bool) -> Parser r Word8
-satisfy p =
-  Parser $ \st@(S sb lb adds eof failDepth) k ->
-    case S.uncons sb of
-      Just (w, sb') | p w -> k w (S sb' lb adds eof failDepth)
-                    | otherwise -> IFailed st "failed to match"
-      Nothing -> case L.uncons lb of
-                   Just (w, lb') | p w -> k w (mkState lb' adds eof failDepth)
-                                 | otherwise -> IFailed st "failed to match"
-                   Nothing -> continue (`IFailed` "satisfy: EOF")
-                                       (satisfy p) k st
-
-pushBack :: L.ByteString -> Parser r ()
-pushBack bs =
-    Parser $ \(S sb lb adds eof failDepth) k ->
-        k () (mkState (bs `appL` (sb +: lb)) adds eof failDepth)
-
-toplevelTranslate :: IResult a -> Result a
-toplevelTranslate (IFailed _ err) = Failed err
-toplevelTranslate (IDone (S sb lb _ _ _) value) = Done (sb +: lb) value
-toplevelTranslate (IPartial k) = Partial $ toplevelTranslate . k
-
-terminalContinuation :: a -> S -> IResult a
-terminalContinuation v s = IDone s v
-
-parse :: Parser r r -> L.ByteString -> Result r
-parse m input =
-  toplevelTranslate $ unParser m (initState input) terminalContinuation
-
-parseWith :: Applicative f => f L.ByteString -> Parser r r -> L.ByteString
-          -> f (Result r)
-parseWith refill p s =
-  case parse p s of
-    Partial k -> k <$> refill
-    ok        -> pure ok
-
-parseTest :: (Show r) => Parser r r -> L.ByteString -> IO ()
-parseTest p s = print (parse p s)
-
-#define PARSER Parser r
-#include "Word8Boilerplate.h"

src/Data/ParserCombinators/Attoparsec/Incremental/Char8.hs

-{-# LANGUAGE CPP #-}
------------------------------------------------------------------------------
--- |
--- Module      :  Data.ParserCombinators.Attoparsec.Incremental.Char8
--- Copyright   :  Daan Leijen 1999-2001, Jeremy Shaw 2006, Bryan O'Sullivan 2007-2008
--- License     :  BSD3
--- 
--- Maintainer  :  bos@serpentine.com
--- Stability   :  experimental
--- Portability :  unknown
---
--- Simple, efficient parser combinators for lazy 'LB.ByteString'
--- strings, loosely based on 'Text.ParserCombinators.Parsec'.
--- 
------------------------------------------------------------------------------
-module Data.ParserCombinators.Attoparsec.Incremental.Char8
-    (
-    -- * Parser
-      Parser
-    , Result(..)
-
-    -- * Running parsers
-    , parse
-
-    -- * Combinators
-    , (<?>)
-
-    -- * Things vaguely like those in @Parsec.Combinator@ (and @Parsec.Prim@)
-    , pushBack
-
-    -- * Things like in @Parsec.Char@
-    , satisfy
-    , letter
-    , digit
-    , anyChar
-    , space
-    , char
-    , notChar
-    , string
-
-    -- * Numeric parsers.
-    , int
-    , integer
-    , double
-
-    -- * Miscellaneous functions.
-    , takeWhile
-    , takeTill
-    , takeCount
-    , skipWhile
-    , skipSpace
-    , inClass
-    , notInClass
-    ) where
-
-import Control.Applicative ((<$>))
-import qualified Data.ByteString.Char8 as SB
-import qualified Data.ByteString.Lazy.Char8 as LB
-import Data.ByteString.Internal (w2c)
-import Data.Char (isDigit, isLetter, isSpace)
-import Data.ParserCombinators.Attoparsec.FastSet
-    (FastSet, memberChar, set)
-import qualified Data.ParserCombinators.Attoparsec.Incremental as I
-import Data.ParserCombinators.Attoparsec.Incremental
-    (Parser, Result(..), (<?>), parse, pushBack,
-     string, takeCount)
-import Data.ByteString.Lex.Lazy.Double (readDouble)
-import Prelude hiding (takeWhile)
-
-numeric :: String -> (Char -> Bool)
-         -> (LB.ByteString -> Maybe (a,LB.ByteString)) -> Parser r a
-numeric desc p f = do
-  s <- takeWhile p
-  case f s of
-    Nothing -> pushBack s >> fail desc
-    Just (i,s') -> pushBack s' >> return i
-                   
-isIntegral :: Char -> Bool
-isIntegral c = isDigit c || c == '-'
-
--- | Parse an integer.  The position counter is not updated.
-int :: Parser r Int
-int = numeric "Int" isIntegral LB.readInt
-
--- | Parse an integer.  The position counter is not updated.
-integer :: Parser r Integer
-integer = numeric "Integer" isIntegral LB.readInteger
-
--- | Parse a Double.  The position counter is not updated.
-double :: Parser r Double
-double = numeric "Double" isDouble readDouble
-    where isDouble c = isIntegral c || c == 'e' || c == '+'
-
-#define PARSER Parser r
-#include "../Char8Boilerplate.h"

src/Data/ParserCombinators/Attoparsec/Internal.hs

-{-# LANGUAGE CPP #-}
------------------------------------------------------------------------------
--- |
--- Module      :  Data.ParserCombinators.Attoparsec.Internal
--- Copyright   :  Daan Leijen 1999-2001, Jeremy Shaw 2006, Bryan O'Sullivan 2007-2008
--- License     :  BSD3
--- 
--- Maintainer  :  bos@serpentine.com
--- Stability   :  experimental
--- Portability :  unknown
---
--- Simple, efficient parser combinators for lazy 'LB.ByteString'
--- strings, loosely based on 'Text.ParserCombinators.Parsec'.
--- 
------------------------------------------------------------------------------
-module Data.ParserCombinators.Attoparsec.Internal
-    (
-    -- * Parser
-      ParseError
-    , Parser
-
-    -- * Running parsers
-    , parse
-    , parseAt
-    , parseTest
-
-    -- * Combinators
-    , (<?>)
-
-    -- * Things vaguely like those in @Parsec.Combinator@ (and @Parsec.Prim@)
-    , try
-    , eof
-    , lookAhead
-    , peek
-
-    -- * Things like in @Parsec.Char@
-    , satisfy
-    , anyWord8
-    , word8
-    , notWord8
-    , string
-    , stringTransform
-
-    -- * Parser converters.
-    , eitherP
-
-    -- * Miscellaneous functions.
-    , getInput
-    , getConsumed
-    , setInput
-    , takeWhile
-    , takeWhile1
-    , takeTill
-    , takeAll
-    , takeCount
-    , skipWhile
-    , notEmpty
-    , match
-    , endOfLine
-
-    -- * Utilities.
-    , (+:)
-    ) where
-
-import Control.Applicative
-import Control.Monad (MonadPlus(..), ap)
-import Control.Monad.Fix (MonadFix(..))
-import qualified Data.ByteString as SB
-import qualified Data.ByteString.Lazy as LB
-import qualified Data.ByteString.Lazy.Char8 as L8
-import qualified Data.ByteString.Unsafe as U
-import qualified Data.ByteString.Internal as I
-import qualified Data.ByteString.Lazy.Internal as LB
-import Data.Int (Int64)
-import Data.Word (Word8)
-import Prelude hiding (takeWhile)
-
-type ParseError = String
-
--- State invariants:
--- * If both strict and lazy bytestrings are empty, the entire input
---   is considered to be empty.
-data S = S {-# UNPACK #-} !SB.ByteString
-           LB.ByteString
-           {-# UNPACK #-} !Int64
-
-newtype Parser a = Parser {
-      unParser :: S -> Either (LB.ByteString, [String]) (a, S)
-    }
-
-instance Functor Parser where
-    fmap f p =
-        Parser $ \s ->
-            case unParser p s of
-              Right (a, s') -> Right (f a, s')
-              Left err -> Left err
-
-instance Monad Parser where
-    return a = Parser $ \s -> Right (a, s)
-    m >>= f = Parser $ \s ->
-              case unParser m s of
-                Right (a, s') -> unParser (f a) s'
-                Left (s', msgs) -> Left (s', msgs)
-    fail err = Parser $ \(S sb lb _) -> Left (sb +: lb, [err])
-
-instance MonadFix Parser where
-    mfix f = Parser $ \s ->
-             let r = case r of
-                       Right (a, _) -> unParser (f a) s
-                       err -> err
-             in r
-
-zero :: Parser a
-zero = Parser $ \(S sb lb _) -> Left (sb +: lb, [])
-{-# INLINE zero #-}
-
-plus :: Parser a -> Parser a -> Parser a
-plus p1 p2 =
-    Parser $ \s@(S sb lb _) ->
-        case unParser p1 s of
-          Left (_, msgs1) -> 
-              case unParser p2 s of