Commits

Mario Blažević  committed de1b47a Merge

Merging in the parent attoparsec changes.

  • Participants
  • Parent commits 473baf0, 4907181

Comments (0)

Files changed (15)

 f0bc0f79b70b97a856042386aa8856ac1cad88f1 0.10.2.0
 030191aa39c97afcad17d365ed114a2eedc04ad4 0.10.3.0
 ec23283e88c00af34c3144a8775ca4358daea3f7 0.10.4.0
+6f4b317a12acdd05d67fd9ec55d3defaf37a01eb 0.11.1.0
+48809d7716468fe2bbc47b191b1d64358daaf6d1 0.11.2.1
+856a772f83a6e382bbcfe53e221a5a92623b107c 0.11.3.0

File Data/Attoparsec/ByteString.hs

     , eitherResult
 
     -- * Combinators
-    , (I.<?>)
-    , I.try
     , module Data.Attoparsec.Combinator
 
     -- * Parsing individual bytes
     , I.word8
     , I.anyWord8
     , I.notWord8
-    , I.peekWord8
     , I.satisfy
     , I.satisfyWith
     , I.skip
 
+    -- ** Lookahead
+    , I.peekWord8
+    , I.peekWord8'
+
     -- ** Byte classes
     , I.inClass
     , I.notInClass
     -- ** Consume all remaining input
     , I.takeByteString
     , I.takeLazyByteString
-
-    -- * State observation and manipulation functions
-    , I.endOfInput
-    , I.atEnd
     ) where
 
 import Data.Attoparsec.Combinator

File Data/Attoparsec/ByteString/Char8.hs

     , A.eitherResult
 
     -- * Combinators
-    , (I.<?>)
-    , I.try
     , module Data.Attoparsec.Combinator
 
     -- * Parsing individual characters
     , char8
     , anyChar
     , notChar
+    , satisfy
+
+    -- ** Lookahead
     , peekChar
-    , satisfy
+    , peekChar'
 
     -- ** Special character parsers
     , digit
     , Number(..)
     , number
     , rational
-
-    -- * State observation and manipulation functions
-    , I.endOfInput
-    , I.atEnd
+    , scientific
     ) where
 
 import Control.Applicative (pure, (*>), (<*), (<$>), (<|>))
 import Data.Attoparsec.ByteString.FastSet (charClass, memberChar)
-import Data.Attoparsec.ByteString.Internal (Parser, (<?>))
+import Data.Attoparsec.ByteString.Internal (Parser)
 import Data.Attoparsec.Combinator
 import Data.Attoparsec.Number (Number(..))
 import Data.Bits (Bits, (.|.), shiftL)
 import Data.ByteString.Internal (c2w, w2c)
 import Data.Int (Int8, Int16, Int32, Int64)
 import Data.String (IsString(..))
-import Data.Scientific (Scientific, scientific, coefficient, base10Exponent)
+import Data.Scientific (Scientific, coefficient, base10Exponent)
+import qualified Data.Scientific as Sci (scientific)
 import Data.Word (Word8, Word16, Word32, Word64, Word)
 import Prelude hiding (takeWhile)
 import qualified Data.Attoparsec.ByteString as A
 anyChar = satisfy $ const True
 {-# INLINE anyChar #-}
 
--- | Match any character. Returns 'Nothing' if end of input has been
--- reached. Does not consume any input.
+-- | Match any character, to perform lookahead. Returns 'Nothing' if
+-- end of input has been reached. Does not consume any input.
 --
 -- /Note/: Because this parser does not fail, do not use it with
 -- combinators such as 'many', because such parsers loop until a
 peekChar = (fmap w2c) `fmap` I.peekWord8
 {-# INLINE peekChar #-}
 
+-- | Match any character, to perform lookahead.  Does not consume any
+-- input, but will fail if end of input has been reached.
+peekChar' :: Parser Char
+peekChar' = w2c `fmap` I.peekWord8'
+{-# INLINE peekChar' #-}
+
 -- | Fast predicate for matching ASCII space characters.
 --
 -- /Note/: This predicate only gives correct answers for the ASCII
                then I (c * 10 ^ e)
                else D (fromInteger c / 10 ^ negate e)
 
+-- | Parse a scientific number.
+--
+-- The syntax accepted by this parser is the same as for 'rational'.
+scientific :: Parser Scientific
+scientific = scientifically id
+
 {-# INLINE scientifically #-}
 scientifically :: (Scientific -> a) -> Parser a
 scientifically h = do
 
   n <- decimal
 
-  let f fracDigits = scientific (B8.foldl' step n fracDigits)
-                                (negate $ B8.length fracDigits)
+  let f fracDigits = Sci.scientific (B8.foldl' step n fracDigits)
+                                    (negate $ B8.length fracDigits)
       step a w = a * 10 + fromIntegral (w - 48)
 
   s <- let dot = 46 in
        (I.satisfy (==dot) *> (f <$> I.takeWhile isDigit_w8)) <|>
-         pure (scientific n 0)
+         pure (Sci.scientific n 0)
 
   let !signedCoeff | positive  =          coefficient s
                    | otherwise = negate $ coefficient s
   let littleE = 101
       bigE    = 69
   (I.satisfy (\c -> c == littleE || c == bigE) *>
-      fmap (h . scientific signedCoeff . (base10Exponent s +)) (signed decimal)) <|>
-    return (h $ scientific signedCoeff   (base10Exponent s))
+      fmap (h . Sci.scientific signedCoeff . (base10Exponent s +)) (signed decimal)) <|>
+    return (h $ Sci.scientific signedCoeff   (base10Exponent s))

File Data/Attoparsec/ByteString/Internal.hs

-{-# LANGUAGE BangPatterns, CPP, Rank2Types, OverloadedStrings,
+{-# LANGUAGE BangPatterns, Rank2Types, OverloadedStrings,
     RecordWildCards, MagicHash, UnboxedTuples #-}
 -- |
 -- Module      :  Data.Attoparsec.ByteString.Internal
     , parseOnly
 
     -- * Combinators
-    , (<?>)
-    , try
     , module Data.Attoparsec.Combinator
 
     -- * Parsing individual bytes
     , skip
     , word8
     , notWord8
+
+    -- ** Lookahead
     , peekWord8
+    , peekWord8'
 
     -- ** Byte classes
     , inClass
     , stringTransform
     , take
     , scan
+    , runScanner
     , takeWhile
     , takeWhile1
     , takeTill
     , takeByteString
     , takeLazyByteString
 
-    -- * State observation and manipulation functions
-    , endOfInput
-    , atEnd
-
     -- * Utilities
     , endOfLine
     ) where
 import Data.Attoparsec.Combinator
 import Data.Attoparsec.Internal.Types
     hiding (Parser, Input, Added, Failure, Success)
+import Data.Attoparsec.Internal
 import Data.Monoid (Monoid(..))
 import Data.Word (Word8)
 import Foreign.ForeignPtr (withForeignPtr)
 import qualified Data.ByteString.Lazy as L
 import qualified Data.ByteString.Unsafe as B
 
-#if defined(__GLASGOW_HASKELL__)
 import GHC.Base (realWorld#)
 import GHC.IO (IO(IO))
-#else
-import System.IO.Unsafe (unsafePerformIO)
-#endif
 
 type Parser = T.Parser B.ByteString
 type Result = IResult B.ByteString
-type Input = T.Input B.ByteString
-type Added = T.Added B.ByteString
 type Failure r = T.Failure B.ByteString r
 type Success a r = T.Success B.ByteString a r
 
-ensure' :: Int -> Input -> Added -> More -> Failure r -> Success B.ByteString r
-        -> IResult B.ByteString r
-ensure' !n0 i0 a0 m0 kf0 ks0 =
-    T.runParser (demandInput >> go n0) i0 a0 m0 kf0 ks0
-  where
-    go !n = T.Parser $ \i a m kf ks ->
-        if B.length (unI i) >= n
-        then ks i a m (unI i)
-        else T.runParser (demandInput >> go n) i a m kf ks
-
--- | If at least @n@ bytes of input are available, return the current
--- input, otherwise fail.
-ensure :: Int -> Parser B.ByteString
-ensure !n = T.Parser $ \i0 a0 m0 kf ks ->
-    if B.length (unI i0) >= n
-    then ks i0 a0 m0 (unI i0)
-    -- The uncommon case is kept out-of-line to reduce code size:
-    else ensure' n i0 a0 m0 kf ks
--- Non-recursive so the bounds check can be inlined:
-{-# INLINE ensure #-}
-
--- | Ask for input.  If we receive any, pass it to a success
--- continuation, otherwise to a failure continuation.
-prompt :: Input -> Added -> More
-       -> (Input -> Added -> More -> Result r)
-       -> (Input -> Added -> More -> Result r)
-       -> Result r
-prompt i0 a0 _m0 kf ks = Partial $ \s ->
-    if B.null s
-    then kf i0 a0 Complete
-    else ks (i0 <> I s) (a0 <> A s) Incomplete
-
--- | Immediately demand more input via a 'Partial' continuation
--- result.
-demandInput :: Parser ()
-demandInput = T.Parser $ \i0 a0 m0 kf ks ->
-    if m0 == Complete
-    then kf i0 a0 m0 ["demandInput"] "not enough bytes"
-    else let kf' i a m = kf i a m ["demandInput"] "not enough bytes"
-             ks' i a m = ks i a m ()
-         in prompt i0 a0 m0 kf' ks'
-
--- | This parser always succeeds.  It returns 'True' if any input is
--- available either immediately or on demand, and 'False' if the end
--- of all input has been reached.
-wantInput :: Parser Bool
-wantInput = T.Parser $ \i0 a0 m0 _kf ks ->
-  case () of
-    _ | not (B.null (unI i0)) -> ks i0 a0 m0 True
-      | m0 == Complete  -> ks i0 a0 m0 False
-      | otherwise       -> let kf' i a m = ks i a m False
-                               ks' i a m = ks i a m True
-                           in prompt i0 a0 m0 kf' ks'
-
-get :: Parser B.ByteString
-get  = T.Parser $ \i0 a0 m0 _kf ks -> ks i0 a0 m0 (unI i0)
-
-put :: B.ByteString -> Parser ()
-put s = T.Parser $ \_i0 a0 m0 _kf ks -> ks (I s) a0 m0 ()
-
--- | Attempt a parse, and if it fails, rewind the input so that no
--- input appears to have been consumed.
---
--- This combinator is provided for compatibility with Parsec.
--- Attoparsec parsers always backtrack on failure.
-try :: Parser a -> Parser a
-try p = p
-{-# INLINE try #-}
-
 -- | The parser @satisfy p@ succeeds for any byte for which the
 -- predicate @p@ returns 'True'. Returns the byte that is actually
 -- parsed.
 -- >digit = satisfy isDigit
 -- >    where isDigit w = w >= 48 && w <= 57
 satisfy :: (Word8 -> Bool) -> Parser Word8
-satisfy p = do
-  s <- ensure 1
-  let !w = B.unsafeHead s
-  if p w
-    then put (B.unsafeTail s) >> return w
-    else fail "satisfy"
+satisfy = satisfyElem
 {-# INLINE satisfy #-}
 
 -- | The parser @skip p@ succeeds for any byte for which the predicate
 
 data T s = T {-# UNPACK #-} !Int s
 
--- | A stateful scanner.  The predicate consumes and transforms a
--- state argument, and each transformed state is passed to successive
--- invocations of the predicate on each byte of the input until one
--- returns 'Nothing' or the input ends.
---
--- This parser does not fail.  It will return an empty string if the
--- predicate returns 'Nothing' on the first byte of input.
---
--- /Note/: Because this parser does not fail, do not use it with
--- combinators such as 'many', because such parsers loop until a
--- failure occurs.  Careless use will thus result in an infinite loop.
-scan :: s -> (s -> Word8 -> Maybe s) -> Parser B.ByteString
-scan s0 p = do
-  chunks <- go [] s0
-  case chunks of
-    [x] -> return x
-    xs  -> return $! B.concat $ reverse xs
+scan_ :: (s -> [B.ByteString] -> Parser r) -> s -> (s -> Word8 -> Maybe s)
+         -> Parser r
+scan_ f s0 p = go [] s0
  where
   go acc s1 = do
     let scanner (B.PS fp off len) =
         input <- wantInput
         if input
           then go (h:acc) s'
-          else return (h:acc)
-      else return (h:acc)
+          else f s' (h:acc)
+      else f s' (h:acc)
+{-# INLINE scan_ #-}
+
+-- | A stateful scanner.  The predicate consumes and transforms a
+-- state argument, and each transformed state is passed to successive
+-- invocations of the predicate on each byte of the input until one
+-- returns 'Nothing' or the input ends.
+--
+-- This parser does not fail.  It will return an empty string if the
+-- predicate returns 'Nothing' on the first byte of input.
+--
+-- /Note/: Because this parser does not fail, do not use it with
+-- combinators such as 'many', because such parsers loop until a
+-- failure occurs.  Careless use will thus result in an infinite loop.
+scan :: s -> (s -> Word8 -> Maybe s) -> Parser B.ByteString
+scan = scan_ $ \_ chunks ->
+  case chunks of
+    [x] -> return x
+    xs  -> return $! B.concat $ reverse xs
 {-# INLINE scan #-}
 
+-- | Like 'scan', but generalized to return the final state of the
+-- scanner.
+runScanner :: s -> (s -> Word8 -> Maybe s) -> Parser (B.ByteString, s)
+runScanner = scan_ $ \s xs -> return (B.concat (reverse xs), s)
+{-# INLINE runScanner #-}
+
 -- | Consume input as long as the predicate returns 'True', and return
 -- the consumed input.
 --
 notWord8 c = satisfy (/= c) <?> "not " ++ show c
 {-# INLINE notWord8 #-}
 
--- | Match any byte. Returns 'Nothing' if end of input has been
--- reached. Does not consume any input.
+-- | Match any byte, to perform lookahead. Returns 'Nothing' if end of
+-- input has been reached. Does not consume any input.
 --
 -- /Note/: Because this parser does not fail, do not use it with
 -- combinators such as 'many', because such parsers loop until a
                  in ks i0 a0 m0 (Just w)
 {-# INLINE peekWord8 #-}
 
--- | Match only if all input has been consumed.
-endOfInput :: Parser ()
-endOfInput = T.Parser $ \i0 a0 m0 kf ks ->
-             if B.null (unI i0)
-             then if m0 == Complete
-                  then ks i0 a0 m0 ()
-                  else let kf' i1 a1 m1 _ _ = addS i0 a0 m0 i1 a1 m1 $
-                                              \ i2 a2 m2 -> ks i2 a2 m2 ()
-                           ks' i1 a1 m1 _   = addS i0 a0 m0 i1 a1 m1 $
-                                              \ i2 a2 m2 -> kf i2 a2 m2 []
-                                                            "endOfInput"
-                       in  T.runParser demandInput i0 a0 m0 kf' ks'
-             else kf i0 a0 m0 [] "endOfInput"
-
--- | Return an indication of whether the end of input has been
--- reached.
-atEnd :: Parser Bool
-atEnd = not <$> wantInput
-{-# INLINE atEnd #-}
+-- | Match any byte, to perform lookahead.  Does not consume any
+-- input, but will fail if end of input has been reached.
+peekWord8' :: Parser Word8
+peekWord8' = do
+  s <- ensure 1
+  return $! B.unsafeHead s
+{-# INLINE peekWord8' #-}
 
 -- | Match either a single newline character @\'\\n\'@, or a carriage
 -- return followed by a newline character @\"\\r\\n\"@.
 endOfLine :: Parser ()
 endOfLine = (word8 10 >> return ()) <|> (string "\r\n" >> return ())
 
--- | Name the parser, in case failure occurs.
-(<?>) :: Parser a
-      -> String                 -- ^ the name to use if parsing fails
-      -> Parser a
-p <?> msg0 = T.Parser $ \i0 a0 m0 kf ks ->
-             let kf' i a m strs msg = kf i a m (msg0:strs) msg
-             in T.runParser p i0 a0 m0 kf' ks
-{-# INLINE (<?>) #-}
-infix 0 <?>
-
 -- | Terminal failure continuation.
 failK :: Failure a
 failK i0 _a0 _m0 stack msg = Fail (unI i0) stack msg
 -- particular, you should do no memory allocation inside an
 -- 'inlinePerformIO' block. On Hugs this is just @unsafePerformIO@.
 inlinePerformIO :: IO a -> a
-#if defined(__GLASGOW_HASKELL__)
 inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
-#else
-inlinePerformIO = unsafePerformIO
-#endif
 {-# INLINE inlinePerformIO #-}

File Data/Attoparsec/ByteString/Lazy.hs

 -- Stability   :  experimental
 -- Portability :  unknown
 --
--- Simple, efficient combinator parsing for lazy 'ByteString'
--- strings, loosely based on the Parsec library.
+-- Simple, efficient combinator parsing that can consume lazy
+-- 'ByteString' strings, loosely based on the Parsec library.
 --
 -- This is essentially the same code as in the 'Data.Attoparsec'
 -- module, only with a 'parse' function that can consume a lazy
 -- more input to be fed in.  Think of this as suitable for use with a
 -- lazily read file, e.g. via 'L.readFile' or 'L.hGetContents'.
 --
--- Behind the scenes, strict 'B.ByteString' values are still used
--- internally to store parser input and manipulate it efficiently.
--- High-performance parsers such as 'string' still expect strict
--- 'B.ByteString' parameters.
+-- /Note:/ The various parser functions and combinators such as
+-- 'string' still expect /strict/ 'B.ByteString' parameters, and
+-- return strict 'B.ByteString' results.  Behind the scenes, strict
+-- 'B.ByteString' values are still used internally to store parser
+-- input and manipulate it efficiently.
 
 module Data.Attoparsec.ByteString.Lazy
     (

File Data/Attoparsec/Combinator.hs

 -- Useful parser combinators, similar to those provided by Parsec.
 module Data.Attoparsec.Combinator
     (
-      choice
+    -- * Combinators
+      try
+    , (<?>)
+    , choice
     , count
     , option
     , many'
     , skipMany
     , skipMany1
     , eitherP
+    -- * Parsing individual chunk elements
+    , satisfyElem
+    -- * State observation and manipulation functions
+    , endOfInput
+    , atEnd
     ) where
 
 import Control.Applicative (Alternative(..), Applicative(..), empty, liftA2,
 import Control.Applicative (many)
 #endif
 
-#if __GLASGOW_HASKELL__ >= 700
-import Data.Attoparsec.Internal.Types (Parser)
-import qualified Data.Attoparsec.Zepto as Z
+import Data.Attoparsec.Internal (demandInput, ensure, put, wantInput)
+import Data.Attoparsec.Internal.Types (Chunk(..), Input(..), Parser(..), addS)
+import Data.Attoparsec.Internal.Types (More(..))
 import Data.ByteString (ByteString)
 import Data.Text (Text)
-#endif
+import qualified Data.Attoparsec.Zepto as Z
+
+-- | Attempt a parse, and if it fails, rewind the input so that no
+-- input appears to have been consumed.
+--
+-- This combinator is provided for compatibility with Parsec.
+-- Attoparsec parsers always backtrack on failure.
+try :: Parser t a -> Parser t a
+try p = p
+{-# INLINE try #-}
+
+-- | Name the parser, in case failure occurs.
+(<?>) :: Parser t a
+      -> String                 -- ^ the name to use if parsing fails
+      -> Parser t a
+p <?> msg0 = Parser $ \i0 a0 m0 kf ks ->
+             let kf' i a m strs msg = kf i a m (msg0:strs) msg
+             in runParser p i0 a0 m0 kf' ks
+{-# INLINE (<?>) #-}
+infix 0 <?>
 
 -- | @choice ps@ tries to apply the actions in the list @ps@ in order,
 -- until one of them succeeds. Returns the value of the succeeding
 -- action.
 choice :: Alternative f => [f a] -> f a
 choice = foldr (<|>) empty
-#if __GLASGOW_HASKELL__ >= 700
 {-# SPECIALIZE choice :: [Parser ByteString a] -> Parser ByteString a #-}
 {-# SPECIALIZE choice :: [Parser Text a] -> Parser Text a #-}
 {-# SPECIALIZE choice :: [Z.Parser a] -> Z.Parser a #-}
-#endif
 
 -- | @option x p@ tries to apply action @p@. If @p@ fails without
 -- consuming input, it returns the value @x@, otherwise the value
 -- > priority  = option 0 (digitToInt <$> digit)
 option :: Alternative f => a -> f a -> f a
 option x p = p <|> pure x
-#if __GLASGOW_HASKELL__ >= 700
 {-# SPECIALIZE option :: a -> Parser ByteString a -> Parser ByteString a #-}
 {-# SPECIALIZE option :: a -> Parser Text a -> Parser Text a #-}
 {-# SPECIALIZE option :: a -> Z.Parser a -> Z.Parser a #-}
-#endif
 
 -- | A version of 'liftM2' that is strict in the result of its first
 -- action.
 -- > commaSep p  = p `sepBy` (symbol ",")
 sepBy :: Alternative f => f a -> f s -> f [a]
 sepBy p s = liftA2 (:) p ((s *> sepBy1 p s) <|> pure []) <|> pure []
-#if __GLASGOW_HASKELL__ >= 700
 {-# SPECIALIZE sepBy :: Parser ByteString a -> Parser ByteString s
                      -> Parser ByteString [a] #-}
 {-# SPECIALIZE sepBy :: Parser Text a -> Parser Text s -> Parser Text [a] #-}
 {-# SPECIALIZE sepBy :: Z.Parser a -> Z.Parser s -> Z.Parser [a] #-}
-#endif
 
 -- | @sepBy' p sep@ applies /zero/ or more occurrences of @p@, separated
 -- by @sep@. Returns a list of the values returned by @p@. The value
 sepBy' :: (MonadPlus m) => m a -> m s -> m [a]
 sepBy' p s = scan `mplus` return []
   where scan = liftM2' (:) p ((s >> sepBy1' p s) `mplus` return [])
-#if __GLASGOW_HASKELL__ >= 700
 {-# SPECIALIZE sepBy' :: Parser ByteString a -> Parser ByteString s
                       -> Parser ByteString [a] #-}
 {-# SPECIALIZE sepBy' :: Parser Text a -> Parser Text s -> Parser Text [a] #-}
 {-# SPECIALIZE sepBy' :: Z.Parser a -> Z.Parser s -> Z.Parser [a] #-}
-#endif
 
 -- | @sepBy1 p sep@ applies /one/ or more occurrences of @p@, separated
 -- by @sep@. Returns a list of the values returned by @p@.
 sepBy1 :: Alternative f => f a -> f s -> f [a]
 sepBy1 p s = scan
     where scan = liftA2 (:) p ((s *> scan) <|> pure [])
-#if __GLASGOW_HASKELL__ >= 700
 {-# SPECIALIZE sepBy1 :: Parser ByteString a -> Parser ByteString s
                       -> Parser ByteString [a] #-}
 {-# SPECIALIZE sepBy1 :: Parser Text a -> Parser Text s -> Parser Text [a] #-}
 {-# SPECIALIZE sepBy1 :: Z.Parser a -> Z.Parser s -> Z.Parser [a] #-}
-#endif
 
 -- | @sepBy1' p sep@ applies /one/ or more occurrences of @p@, separated
 -- by @sep@. Returns a list of the values returned by @p@. The value
 sepBy1' :: (MonadPlus m) => m a -> m s -> m [a]
 sepBy1' p s = scan
     where scan = liftM2' (:) p ((s >> scan) `mplus` return [])
-#if __GLASGOW_HASKELL__ >= 700
 {-# SPECIALIZE sepBy1' :: Parser ByteString a -> Parser ByteString s
                        -> Parser ByteString [a] #-}
 {-# SPECIALIZE sepBy1' :: Parser Text a -> Parser Text s -> Parser Text [a] #-}
 {-# SPECIALIZE sepBy1' :: Z.Parser a -> Z.Parser s -> Z.Parser [a] #-}
-#endif
 
 -- | @manyTill p end@ applies action @p@ /zero/ or more times until
 -- action @end@ succeeds, and returns the list of values returned by
 -- @p@.  This can be used to scan comments:
 --
--- >  simpleComment   = string "<!--" *> manyTill anyChar (try (string "-->"))
+-- >  simpleComment   = string "<!--" *> manyTill anyChar (string "-->")
 --
--- Note the overlapping parsers @anyChar@ and @string \"<!--\"@, and
--- therefore the use of the 'try' combinator.
+-- (Note the overlapping parsers @anyChar@ and @string \"-->\"@.
+-- While this will work, it is not very efficient, as it will cause a
+-- lot of backtracking.)
 manyTill :: Alternative f => f a -> f b -> f [a]
 manyTill p end = scan
     where scan = (end *> pure []) <|> liftA2 (:) p scan
-#if __GLASGOW_HASKELL__ >= 700
 {-# SPECIALIZE manyTill :: Parser ByteString a -> Parser ByteString b
                         -> Parser ByteString [a] #-}
 {-# SPECIALIZE manyTill :: Parser Text a -> Parser Text b -> Parser Text [a] #-}
 {-# SPECIALIZE manyTill :: Z.Parser a -> Z.Parser b -> Z.Parser [a] #-}
-#endif
 
 -- | @manyTill' p end@ applies action @p@ /zero/ or more times until
 -- action @end@ succeeds, and returns the list of values returned by
 -- @p@.  This can be used to scan comments:
 --
--- >  simpleComment   = string "<!--" *> manyTill' anyChar (try (string "-->"))
+-- >  simpleComment   = string "<!--" *> manyTill' anyChar (string "-->")
 --
--- Note the overlapping parsers @anyChar@ and @string \"<!--\"@, and
--- therefore the use of the 'try' combinator. The value returned by @p@
--- is forced to WHNF.
+-- (Note the overlapping parsers @anyChar@ and @string \"-->\"@.
+-- While this will work, it is not very efficient, as it will cause a
+-- lot of backtracking.)
+--
+-- The value returned by @p@ is forced to WHNF.
 manyTill' :: (MonadPlus m) => m a -> m b -> m [a]
 manyTill' p end = scan
     where scan = (end >> return []) `mplus` liftM2' (:) p scan
-#if __GLASGOW_HASKELL__ >= 700
 {-# SPECIALIZE manyTill' :: Parser ByteString a -> Parser ByteString b
                          -> Parser ByteString [a] #-}
 {-# SPECIALIZE manyTill' :: Parser Text a -> Parser Text b -> Parser Text [a] #-}
 {-# SPECIALIZE manyTill' :: Z.Parser a -> Z.Parser b -> Z.Parser [a] #-}
-#endif
 
 -- | Skip zero or more instances of an action.
 skipMany :: Alternative f => f a -> f ()
 skipMany p = scan
     where scan = (p *> scan) <|> pure ()
-#if __GLASGOW_HASKELL__ >= 700
 {-# SPECIALIZE skipMany :: Parser ByteString a -> Parser ByteString () #-}
 {-# SPECIALIZE skipMany :: Parser Text a -> Parser Text () #-}
 {-# SPECIALIZE skipMany :: Z.Parser a -> Z.Parser () #-}
-#endif
 
 -- | Skip one or more instances of an action.
 skipMany1 :: Alternative f => f a -> f ()
 skipMany1 p = p *> skipMany p
-#if __GLASGOW_HASKELL__ >= 700
 {-# SPECIALIZE skipMany1 :: Parser ByteString a -> Parser ByteString () #-}
 {-# SPECIALIZE skipMany1 :: Parser Text a -> Parser Text () #-}
 {-# SPECIALIZE skipMany1 :: Z.Parser a -> Z.Parser () #-}
-#endif
 
 -- | Apply the given action repeatedly, returning every result.
 count :: Monad m => Int -> m a -> m [a]
 eitherP :: (Alternative f) => f a -> f b -> f (Either a b)
 eitherP a b = (Left <$> a) <|> (Right <$> b)
 {-# INLINE eitherP #-}
+
+-- | The parser @satisfyElem p@ succeeds for any chunk element for which the
+-- predicate @p@ returns 'True'. Returns the element that is
+-- actually parsed.
+--
+-- >digit = satisfyElem isDigit
+-- >    where isDigit c = c >= '0' && c <= '9'
+satisfyElem :: Chunk t => (ChunkElem t -> Bool) -> Parser t (ChunkElem t)
+satisfyElem p = do
+  c <- ensure 1
+  let !h = unsafeChunkHead c
+  if p h
+    then put (unsafeChunkTail c) >> return h
+    else fail "satisfyElem"
+{-# INLINE satisfyElem #-}
+
+-- | Match only if all input has been consumed.
+endOfInput :: Chunk t => Parser t ()
+endOfInput = Parser $ \i0 a0 m0 kf ks ->
+             if nullChunk (unI i0)
+             then if m0 == Complete
+                  then ks i0 a0 m0 ()
+                  else let kf' i1 a1 m1 _ _ = addS i0 a0 m0 i1 a1 m1 $
+                                              \ i2 a2 m2 -> ks i2 a2 m2 ()
+                           ks' i1 a1 m1 _   = addS i0 a0 m0 i1 a1 m1 $
+                                              \ i2 a2 m2 -> kf i2 a2 m2 []
+                                                            "endOfInput"
+                       in  runParser demandInput i0 a0 m0 kf' ks'
+             else kf i0 a0 m0 [] "endOfInput"
+{-# SPECIALIZE endOfInput :: Parser ByteString () #-}
+{-# SPECIALIZE endOfInput :: Parser Text () #-}
+
+-- | Return an indication of whether the end of input has been
+-- reached.
+atEnd :: Chunk t => Parser t Bool
+atEnd = not <$> wantInput
+{-# INLINE atEnd #-}

File Data/Attoparsec/Internal.hs

+{-# LANGUAGE BangPatterns #-}
 -- |
 -- Module      :  Data.Attoparsec.Internal
 -- Copyright   :  Bryan O'Sullivan 2012
 module Data.Attoparsec.Internal
     (
       compareResults
+    , get
+    , put
+    , ensure
+    , prompt
+    , demandInput
+    , wantInput
     ) where
 
-import Data.Attoparsec.Internal.Types (IResult(..))
+import Data.Attoparsec.Internal.Types
+import Data.ByteString (ByteString)
+import Data.Text (Text)
 
 -- | Compare two 'IResult' values for equality.
 --
     Just (i0 == i1 && r0 == r1)
 compareResults (Partial _) (Partial _) = Nothing
 compareResults _ _ = Just False
+
+get :: Parser t t
+get = Parser $ \i0 a0 m0 _kf ks -> ks i0 a0 m0 (unI i0)
+{-# INLINE get #-}
+
+put :: t -> Parser t ()
+put c = Parser $ \_i0 a0 m0 _kf ks -> ks (I c) a0 m0 ()
+{-# INLINE put #-}
+
+ensure' :: Chunk t
+        => Int -> Input t -> Added t -> More -> Failure t r -> Success t t r
+        -> IResult t r
+ensure' !n0 i0 a0 m0 kf0 ks0 =
+    runParser (demandInput >> go n0) i0 a0 m0 kf0 ks0
+  where
+    go !n = Parser $ \i a m kf ks ->
+        if chunkLengthAtLeast (unI i) n
+        then ks i a m (unI i)
+        else runParser (demandInput >> go n) i a m kf ks
+{-# SPECIALIZE ensure' :: Int -> Input ByteString -> Added ByteString -> More
+                       -> Failure ByteString r
+                       -> Success ByteString ByteString r
+                       -> IResult ByteString r #-}
+{-# SPECIALIZE ensure' :: Int -> Input Text -> Added Text -> More
+                       -> Failure Text r -> Success Text Text r
+                       -> IResult Text r #-}
+
+-- | If at least @n@ elements of input are available, return the
+-- current input, otherwise fail.
+ensure :: Chunk t => Int -> Parser t t
+ensure !n = Parser $ \i0 a0 m0 kf ks ->
+    if chunkLengthAtLeast (unI i0) n
+    then ks i0 a0 m0 (unI i0)
+    -- The uncommon case is kept out-of-line to reduce code size:
+    else ensure' n i0 a0 m0 kf ks
+-- Non-recursive so the bounds check can be inlined:
+{-# INLINE ensure #-}
+
+-- | Ask for input.  If we receive any, pass it to a success
+-- continuation, otherwise to a failure continuation.
+prompt :: Chunk t
+       => Input t -> Added t -> More
+       -> (Input t -> Added t -> More -> IResult t r)
+       -> (Input t -> Added t -> More -> IResult t r)
+       -> IResult t r
+prompt i0 a0 _m0 kf ks = Partial $ \s ->
+    if nullChunk s
+    then kf i0 a0 Complete
+    else ks (i0 <> I s) (a0 <> A s) Incomplete
+{-# SPECIALIZE prompt :: Input ByteString -> Added ByteString -> More
+                      -> (Input ByteString -> Added ByteString -> More
+                          -> IResult ByteString r)
+                      -> (Input ByteString -> Added ByteString -> More
+                          -> IResult ByteString r)
+                      -> IResult ByteString r #-}
+{-# SPECIALIZE prompt :: Input Text -> Added Text -> More
+                      -> (Input Text -> Added Text -> More -> IResult Text r)
+                      -> (Input Text -> Added Text-> More -> IResult Text r)
+                      -> IResult Text r #-}
+
+-- | Immediately demand more input via a 'Partial' continuation
+-- result.
+demandInput :: Chunk t => Parser t ()
+demandInput = Parser $ \i0 a0 m0 kf ks ->
+    if m0 == Complete
+    then kf i0 a0 m0 ["demandInput"] "not enough input"
+    else let kf' i a m = kf i a m ["demandInput"] "not enough input"
+             ks' i a m = ks i a m ()
+         in prompt i0 a0 m0 kf' ks'
+{-# SPECIALIZE demandInput :: Parser ByteString () #-}
+{-# SPECIALIZE demandInput :: Parser Text () #-}
+
+-- | This parser always succeeds.  It returns 'True' if any input is
+-- available either immediately or on demand, and 'False' if the end
+-- of all input has been reached.
+wantInput :: Chunk t => Parser t Bool
+wantInput = Parser $ \i0 a0 m0 _kf ks ->
+  case () of
+    _ | not (nullChunk (unI i0)) -> ks i0 a0 m0 True
+      | m0 == Complete  -> ks i0 a0 m0 False
+      | otherwise       -> let kf' i a m = ks i a m False
+                               ks' i a m = ks i a m True
+                           in prompt i0 a0 m0 kf' ks'
+{-# SPECIALIZE wantInput :: Parser ByteString Bool #-}
+{-# SPECIALIZE wantInput :: Parser Text Bool #-}

File Data/Attoparsec/Internal/Types.hs

 {-# LANGUAGE BangPatterns, CPP, GeneralizedNewtypeDeriving, OverloadedStrings,
-    Rank2Types, RecordWildCards #-}
+    Rank2Types, RecordWildCards, TypeFamilies #-}
 -- |
 -- Module      :  Data.Attoparsec.Internal.Types
 -- Copyright   :  Bryan O'Sullivan 2007-2011
     , More(..)
     , addS
     , (<>)
+    , Chunk(..)
     ) where
 
 import Control.Applicative (Alternative(..), Applicative(..), (<$>))
 import Control.DeepSeq (NFData(rnf))
 import Control.Monad (MonadPlus(..))
+import Data.ByteString (ByteString)
+import Data.ByteString.Internal (w2c)
 import Data.Monoid (Monoid(..))
+import Data.Text (Text)
+import Data.Word (Word8)
 import Prelude hiding (getChar, take, takeWhile)
+import qualified Data.ByteString as BS
+import qualified Data.ByteString.Unsafe as BS
+import qualified Data.Text as T
+import qualified Data.Text.Unsafe as T
 
 -- | The result of a parse.  This is parameterised over the type @t@
 -- of string that was processed.
                                   \ i2 a2 m2 -> runParser b i2 a2 m2 kf ks
                ks' i1 a1 m1 = ks i1 (a0 <> a1) m1
            in  noAdds i0 a0 m0 $ \i2 a2 m2 -> runParser a i2 a2 m2 kf' ks'
-{-# INLINE plus #-}
 
 instance (Monoid t) => MonadPlus (Parser t) where
     mzero = failDesc "mzero"
 (<>) :: (Monoid m) => m -> m -> m
 (<>) = mappend
 {-# INLINE (<>) #-}
+
+-- | A common interface for input chunks.
+class Monoid c => Chunk c where
+  type ChunkElem c
+  -- | Test if the chunk is empty.
+  nullChunk :: c -> Bool
+  -- | Get the head element of a non-empty chunk.
+  unsafeChunkHead :: c -> ChunkElem c
+  -- | Get the tail of a non-empty chunk.
+  unsafeChunkTail :: c -> c
+  -- | Check if the chunk has the length of at least @n@ elements.
+  chunkLengthAtLeast :: c -> Int -> Bool
+  -- | Map an element to the corresponding character.
+  --   The first argument is ignored.
+  chunkElemToChar :: c -> ChunkElem c -> Char
+
+instance Chunk ByteString where
+  type ChunkElem ByteString = Word8
+  nullChunk = BS.null
+  {-# INLINE nullChunk #-}
+  unsafeChunkHead = BS.unsafeHead
+  {-# INLINE unsafeChunkHead #-}
+  unsafeChunkTail = BS.unsafeTail
+  {-# INLINE unsafeChunkTail #-}
+  chunkLengthAtLeast bs n = BS.length bs >= n
+  {-# INLINE chunkLengthAtLeast #-}
+  chunkElemToChar = const w2c
+  {-# INLINE chunkElemToChar #-}
+
+instance Chunk Text where
+  type ChunkElem Text = Char
+  nullChunk = T.null
+  {-# INLINE nullChunk #-}
+  unsafeChunkHead = T.unsafeHead
+  {-# INLINE unsafeChunkHead #-}
+  unsafeChunkTail = T.unsafeTail
+  {-# INLINE unsafeChunkTail #-}
+  chunkLengthAtLeast t n = T.lengthWord16 t `quot` 2 >= n || T.length t >= n
+  {-# INLINE chunkLengthAtLeast #-}
+  chunkElemToChar = const id
+  {-# INLINE chunkElemToChar #-}

File Data/Attoparsec/Text.hs

     , eitherResult
 
     -- * Combinators
-    , (I.<?>)
-    , I.try
     , module Data.Attoparsec.Combinator
 
     -- * Parsing individual characters
     , I.satisfy
     , I.satisfyWith
     , I.skip
+
+    -- ** Lookahead
     , I.peekChar
+    , I.peekChar'
 
     -- ** Special character parsers
     , digit
     , Number(..)
     , number
     , rational
-
-    -- * State observation and manipulation functions
-    , I.endOfInput
-    , I.atEnd
+    , scientific
     ) where
 
 import Control.Applicative (pure, (<$>), (*>), (<*), (<|>))
 import Data.Attoparsec.Combinator
 import Data.Attoparsec.Number (Number(..))
-import Data.Scientific (Scientific, scientific, coefficient, base10Exponent)
-import Data.Attoparsec.Text.Internal ((<?>), Parser, Result, parse, takeWhile1)
+import Data.Scientific (Scientific, coefficient, base10Exponent)
+import qualified Data.Scientific as Sci (scientific)
+import Data.Attoparsec.Text.Internal (Parser, Result, parse, takeWhile1)
 import Data.Bits (Bits, (.|.), shiftL)
 import Data.Char (isAlpha, isDigit, isSpace, ord)
 import Data.Int (Int8, Int16, Int32, Int64)
                then I (c * 10 ^ e)
                else D (fromInteger c / 10 ^ negate e)
 
+-- | Parse a scientific number.
+--
+-- The syntax accepted by this parser is the same as for 'rational'.
+scientific :: Parser Scientific
+scientific = scientifically id
+
 {-# INLINE scientifically #-}
 scientifically :: (Scientific -> a) -> Parser a
 scientifically h = do
 
   n <- decimal
 
-  let f fracDigits = scientific (T.foldl' step n fracDigits)
-                                (negate $ T.length fracDigits)
+  let f fracDigits = Sci.scientific (T.foldl' step n fracDigits)
+                                    (negate $ T.length fracDigits)
       step a c = a * 10 + fromIntegral (ord c - 48)
 
   s <- (I.satisfy (=='.') *> (f <$> I.takeWhile isDigit)) <|>
-         pure (scientific n 0)
+         pure (Sci.scientific n 0)
 
   let !signedCoeff | positive  =          coefficient s
                    | otherwise = negate $ coefficient s
 
   (I.satisfy (\c -> c == 'e' || c == 'E') *>
-      fmap (h . scientific signedCoeff . (base10Exponent s +)) (signed decimal)) <|>
-    return (h $ scientific signedCoeff   (base10Exponent s))
+      fmap (h . Sci.scientific signedCoeff . (base10Exponent s +)) (signed decimal)) <|>
+    return (h $ Sci.scientific signedCoeff   (base10Exponent s))
 
 -- | Parse a single digit, as recognised by 'isDigit'.
 digit :: Parser Char

File Data/Attoparsec/Text/Internal.hs

     , parseOnly
 
     -- * Combinators
-    , (<?>)
-    , try
     , module Data.Attoparsec.Combinator
 
     -- * Parsing individual characters
     , skip
     , char
     , notChar
+
+    -- ** Lookahead
     , peekChar
+    , peekChar'
 
     -- ** Character classes
     , inClass
     , asciiCI
     , take
     , scan
+    , runScanner
     , takeWhile
     , takeWhile1
     , takeTill
     , takeText
     , takeLazyText
 
-    -- * State observation and manipulation functions
-    , endOfInput
-    , atEnd
-
     -- * Utilities
     , endOfLine
     ) where
 import Control.Monad (when)
 import Data.Attoparsec.Combinator
 import Data.Attoparsec.Internal.Types hiding (Parser, Input, Added, Failure, Success)
+import Data.Attoparsec.Internal
 import Data.Monoid (Monoid(..))
 import Data.String (IsString(..))
 import Data.Text (Text)
 import qualified Data.Attoparsec.Internal.Types as T
 import qualified Data.Attoparsec.Text.FastSet as Set
 import qualified Data.Text as T
-import qualified Data.Text.Internal as T
 import qualified Data.Text.Lazy as L
 
 type Parser = T.Parser Text
 type Result = IResult Text
-type Input = T.Input Text
-type Added = T.Added Text
 type Failure r = T.Failure Text r
 type Success a r = T.Success Text a r
 
 instance (a ~ Text) => IsString (Parser a) where
     fromString = string . T.pack
 
-lengthAtLeast :: T.Text -> Int -> Bool
-lengthAtLeast t@(T.Text _ _ len) n = (len `quot` 2) >= n || T.length t >= n
-{-# INLINE lengthAtLeast #-}
-
--- | If at least @n@ characters of input are available, return the
--- current input, otherwise fail.
-ensure :: Int -> Parser Text
-ensure !n = T.Parser $ \i0 a0 m0 kf ks ->
-    if lengthAtLeast (unI i0) n
-    then ks i0 a0 m0 (unI i0)
-    else runParser (demandInput >> go n) i0 a0 m0 kf ks
-  where
-    go n' = T.Parser $ \i0 a0 m0 kf ks ->
-        if lengthAtLeast (unI i0) n'
-        then ks i0 a0 m0 (unI i0)
-        else runParser (demandInput >> go n') i0 a0 m0 kf ks
-{-# INLINE ensure #-}
-
--- | Ask for input.  If we receive any, pass it to a success
--- continuation, otherwise to a failure continuation.
-prompt :: Input -> Added -> More
-       -> (Input -> Added -> More -> Result r)
-       -> (Input -> Added -> More -> Result r)
-       -> Result r
-prompt i0 a0 _m0 kf ks = Partial $ \s ->
-    if T.null s
-    then kf i0 a0 Complete
-    else ks (i0 <> I s) (a0 <> A s) Incomplete
-
--- | Immediately demand more input via a 'Partial' continuation
--- result.
-demandInput :: Parser ()
-demandInput = T.Parser $ \i0 a0 m0 kf ks ->
-    if m0 == Complete
-    then kf i0 a0 m0 ["demandInput"] "not enough input"
-    else let kf' i a m = kf i a m ["demandInput"] "not enough input"
-             ks' i a m = ks i a m ()
-         in prompt i0 a0 m0 kf' ks'
-
--- | This parser always succeeds.  It returns 'True' if any input is
--- available either immediately or on demand, and 'False' if the end
--- of all input has been reached.
-wantInput :: Parser Bool
-wantInput = T.Parser $ \i0 a0 m0 _kf ks ->
-  case () of
-    _ | not (T.null (unI i0)) -> ks i0 a0 m0 True
-      | m0 == Complete  -> ks i0 a0 m0 False
-      | otherwise       -> let kf' i a m = ks i a m False
-                               ks' i a m = ks i a m True
-                           in prompt i0 a0 m0 kf' ks'
-
-get :: Parser Text
-get  = T.Parser $ \i0 a0 m0 _kf ks -> ks i0 a0 m0 (unI i0)
-
-put :: Text -> Parser ()
-put s = T.Parser $ \_i0 a0 m0 _kf ks -> ks (I s) a0 m0 ()
-
--- | Attempt a parse, and if it fails, rewind the input so that no
--- input appears to have been consumed.
---
--- This combinator is provided for compatibility with Parsec.
--- Attoparsec parsers always backtrack on failure.
-try :: Parser a -> Parser a
-try p = p
-{-# INLINE try #-}
-
 unsafeHead :: Text -> Char
 unsafeHead = T.head
 
 -- >digit = satisfy isDigit
 -- >    where isDigit c = c >= '0' && c <= '9'
 satisfy :: (Char -> Bool) -> Parser Char
-satisfy p = do
-  s <- ensure 1
-  let !w = unsafeHead s
-  if p w
-    then put (unsafeTail s) >> return w
-    else fail "satisfy"
+satisfy = satisfyElem
 {-# INLINE satisfy #-}
 
 -- | The parser @skip p@ succeeds for any character for which the
 data Scan s = Continue s
             | Finished {-# UNPACK #-} !Int T.Text
 
--- | A stateful scanner.  The predicate consumes and transforms a
--- state argument, and each transformed state is passed to successive
--- invocations of the predicate on each character of the input until one
--- returns 'Nothing' or the input ends.
---
--- This parser does not fail.  It will return an empty string if the
--- predicate returns 'Nothing' on the first character of input.
---
--- /Note/: Because this parser does not fail, do not use it with
--- combinators such as 'many', because such parsers loop until a
--- failure occurs.  Careless use will thus result in an infinite loop.
-scan :: s -> (s -> Char -> Maybe s) -> Parser Text
-scan s0 p = do
-  chunks <- go [] s0
-  case chunks of
-    [x] -> return x
-    xs  -> return . T.concat . reverse $ xs
+scan_ :: (s -> [Text] -> Parser r) -> s -> (s -> Char -> Maybe s) -> Parser r
+scan_ f s0 p = go [] s0
  where
   scanner s !n t =
     case T.uncons t of
                          more <- wantInput
                          if more
                            then go (input : acc) s'
-                           else return (input : acc)
-      Finished n t -> put t >> return (T.take n input : acc)
+                           else f s' (input : acc)
+      Finished n t -> put t >> f s (T.take n input : acc)
+{-# INLINE scan_ #-}
+
+-- | A stateful scanner.  The predicate consumes and transforms a
+-- state argument, and each transformed state is passed to successive
+-- invocations of the predicate on each character of the input until one
+-- returns 'Nothing' or the input ends.
+--
+-- This parser does not fail.  It will return an empty string if the
+-- predicate returns 'Nothing' on the first character of input.
+--
+-- /Note/: Because this parser does not fail, do not use it with
+-- combinators such as 'many', because such parsers loop until a
+-- failure occurs.  Careless use will thus result in an infinite loop.
+scan :: s -> (s -> Char -> Maybe s) -> Parser Text
+scan = scan_ $ \_ chunks ->
+  case chunks of
+    [x] -> return x
+    xs  -> return . T.concat . reverse $ xs
 {-# INLINE scan #-}
 
+-- | Like 'scan', but generalized to return the final state of the
+-- scanner.
+runScanner :: s -> (s -> Char -> Maybe s) -> Parser (Text, s)
+runScanner = scan_ $ \s xs -> return (T.concat (reverse xs), s)
+{-# INLINE runScanner #-}
+
 -- | Consume input as long as the predicate returns 'True', and return
 -- the consumed input.
 --
 notChar c = satisfy (/= c) <?> "not " ++ show c
 {-# INLINE notChar #-}
 
--- | Match any character. Returns 'Nothing' if end of input has been
--- reached. Does not consume any input.
+-- | Match any character, to perform lookahead. Returns 'Nothing' if
+-- end of input has been reached. Does not consume any input.
 --
 -- /Note/: Because this parser does not fail, do not use it with
 -- combinators such as 'many', because such parsers loop until a
                 in ks i0 a0 m0 (Just c)
 {-# INLINE peekChar #-}
 
--- | Match only if all input has been consumed.
-endOfInput :: Parser ()
-endOfInput = T.Parser $ \i0 a0 m0 kf ks ->
-             if T.null (unI i0)
-             then if m0 == Complete
-                  then ks i0 a0 m0 ()
-                  else let kf' i1 a1 m1 _ _ = addS i0 a0 m0 i1 a1 m1 $
-                                              \ i2 a2 m2 -> ks i2 a2 m2 ()
-                           ks' i1 a1 m1 _   = addS i0 a0 m0 i1 a1 m1 $
-                                              \ i2 a2 m2 -> kf i2 a2 m2 []
-                                                            "endOfInput"
-                       in  runParser demandInput i0 a0 m0 kf' ks'
-             else kf i0 a0 m0 [] "endOfInput"
-
--- | Return an indication of whether the end of input has been
--- reached.
-atEnd :: Parser Bool
-atEnd = not <$> wantInput
-{-# INLINE atEnd #-}
+-- | Match any character, to perform lookahead.  Does not consume any
+-- input, but will fail if end of input has been reached.
+peekChar' :: Parser Char
+peekChar' = do
+  s <- ensure 1
+  return $! unsafeHead s
+{-# INLINE peekChar' #-}
 
 -- | Match either a single newline character @\'\\n\'@, or a carriage
 -- return followed by a newline character @\"\\r\\n\"@.
 endOfLine :: Parser ()
 endOfLine = (char '\n' >> return ()) <|> (string "\r\n" >> return ())
 
--- | Name the parser, in case failure occurs.
-(<?>) :: Parser a
-      -> String                 -- ^ the name to use if parsing fails
-      -> Parser a
-p <?> msg0 = T.Parser $ \i0 a0 m0 kf ks ->
-             let kf' i a m strs msg = kf i a m (msg0:strs) msg
-             in runParser p i0 a0 m0 kf' ks
-{-# INLINE (<?>) #-}
-infix 0 <?>
-
 -- | Terminal failure continuation.
 failK :: Failure a
 failK i0 _a0 _m0 stack msg = Fail (unI i0) stack msg

File Data/Attoparsec/Text/Lazy.hs

 -- Stability   :  experimental
 -- Portability :  unknown
 --
--- Simple, efficient combinator parsing for lazy 'Text'
+-- Simple, efficient combinator parsing that can consume lazy 'Text'
 -- strings, loosely based on the Parsec library.
 --
 -- This is essentially the same code as in the 'Data.Attoparsec.Text'
 -- more input to be fed in.  Think of this as suitable for use with a
 -- lazily read file, e.g. via 'L.readFile' or 'L.hGetContents'.
 --
--- Behind the scenes, strict 'T.Text' values are still used
--- internally to store parser input and manipulate it efficiently.
--- High-performance parsers such as 'string' still expect strict
--- 'T.Text' parameters.
+-- /Note:/ The various parser functions and combinators such as
+-- 'string' still expect /strict/ 'T.Text' parameters, and return
+-- strict 'T.Text' results.  Behind the scenes, strict 'T.Text' values
+-- are still used internally to store parser input and manipulate it
+-- efficiently.
 
 module Data.Attoparsec.Text.Lazy
     (

File Data/Attoparsec/Types.hs

     (
       Parser
     , IResult(..)
+    , Chunk(..)
     ) where
 
-import Data.Attoparsec.Internal.Types (Parser(..), IResult(..))
+import Data.Attoparsec.Internal.Types (Parser(..), IResult(..), Chunk(..))

File attoparsec.cabal

 name:            attoparsec
-version:         0.11.0.0
+version:         0.11.3.0
 license:         BSD3
 license-file:    LICENSE
 category:        Text, Parsing
     file formats.
 extra-source-files:
     README.markdown
-    benchmarks/Benchmarks.hs
+    benchmarks/*.hs
     benchmarks/Makefile
-    benchmarks/Tiny.hs
     benchmarks/attoparsec-benchmarks.cabal
     benchmarks/med.txt.bz2
+    changelog.md
+    examples/*.c
+    examples/*.hs
     examples/Makefile
-    examples/Parsec_RFC2616.hs
-    examples/RFC2616.hs
-    examples/TestRFC2616.hs
-    examples/rfc2616.c
+    tests/*.hs
     tests/Makefile
-    tests/QC.hs
     tests/QC/*.hs
     tests/TestFastSet.hs
 
 Flag developer
   Description: Whether to build the library in development mode
   Default: False
+  Manual: True
 
 library
   build-depends: array,
                  containers,
                  deepseq,
                  monoid-subclasses < 0.4,
-                 text >= 0.11.1.5,
-                 scientific >= 0.0.0.2
+                 text >= 0.11.3.1,
+                 scientific >= 0.2
+                 scientific >= 0.2
 
   exposed-modules: Data.Attoparsec
                    Data.Attoparsec.ByteString

File benchmarks/Alternative.hs

+{-# LANGUAGE OverloadedStrings #-}
+
+-- This benchmark reveals a huge performance regression that showed up
+-- under GHC 7.8.1 (https://github.com/bos/attoparsec/issues/56).
+--
+-- With GHC 7.6.3 and older, this program runs in 0.04 seconds.  Under
+-- GHC 7.8.1 with (<|>) inlined, time jumps to 12 seconds!
+
+import Control.Applicative
+import Data.Text (Text)
+import qualified Data.Attoparsec.Text as A
+import qualified Data.Text as T
+
+testParser :: Text -> Either String Int
+testParser f = fmap length -- avoid printing out the entire matched list
+        . A.parseOnly (many ((() <$ A.string "b") <|> (() <$ A.anyChar)))
+        $ f
+
+main :: IO ()
+main = print . testParser $ T.replicate 50000 "a"

File changelog.md

+0.11.3.0
+
+* New function scientific is compatible with rational, but parses
+  integers more efficiently (https://github.com/bos/aeson/issues/198)
+
+0.11.2.0
+
+* The new Chunk typeclass allows for some code sharing with Ed
+  Kmett's parsers package: http://hackage.haskell.org/package/parsers
+
+* New function runScanner generalises scan to return the final state
+  of the scanner as well as the input consumed.
+
+
+0.11.1.0
+
+* New dependency: the scientific package.  This allows us to parse
+  numbers much more efficiently.
+
+* peekWord8', peekChar': new primitive parsers that allow
+  single-character lookahead.