Bryan O'Sullivan avatar Bryan O'Sullivan committed 976437f Merge

Merge!

Comments (0)

Files changed (5)

Data/Attoparsec.hs

 -- Stability   :  experimental
 -- Portability :  unknown
 --
--- Simple, efficient parser combinators for 'B.ByteString' strings,
+-- Simple, efficient combinator parsing for 'B.ByteString' strings,
 -- loosely based on the Parsec library.
 
 module Data.Attoparsec
     (
+    -- * Differences from Parsec
+    -- $parsec
+
+    -- * Performance considerations
+    -- $performance
+
     -- * Parser types
       I.Parser
     , Result(..)
 
+    -- ** Typeclass instances
+    -- $instances
+
     -- * Running parsers
     , parse
     , feed
     , module Data.Attoparsec.Combinator
 
     -- * Parsing individual bytes
+    , I.word8
     , I.anyWord8
     , I.notWord8
-    , I.word8
     , I.satisfy
+    , I.satisfyWith
 
     -- ** Byte classes
     , I.inClass
     -- * Efficient string handling
     , I.string
     , I.skipWhile
-    , I.stringTransform
     , I.take
-    , I.takeTill
     , I.takeWhile
     , I.takeWhile1
+    , I.takeTill
 
     -- * State observation and manipulation functions
     , I.endOfInput
     , I.ensure
     ) where
 
+import Control.Applicative (Alternative(..), Applicative)
+import Control.Monad (MonadPlus(..))
 import Data.Attoparsec.Combinator
 import Prelude hiding (takeWhile)
 import qualified Data.Attoparsec.Internal as I
 import qualified Data.ByteString as B
 
+-- $parsec
+--
+-- Compared to Parsec 3, Attoparsec makes several tradeoffs.  It is
+-- not intended for, or ideal for, all possible uses.
+--
+-- * While Attoparsec can consume input incrementally, Parsec cannot.
+--   Incremental input is a huge deal for efficient and secure network
+--   and system programming, since it gives much more control to users
+--   of the library over matters such as resource usage and the I/O
+--   model to use.
+--
+-- * Much of the performance advantage of Attoparsec is gained via
+--   high-performance parsers such as 'I.takeWhile' and 'I.string'.
+--   If you use complicated combinators that return lists of bytes or
+--   characters, there really isn't much performance difference the
+--   two libraries.
+--
+-- * Unlike Parsec 3, Attoparsec does not support being used as a
+--   monad transformer.  This is mostly a matter of the implementor
+--   not having needed that functionality.
+--
+-- * Attoparsec is specialised to deal only with strict 'B.ByteString'
+--   input.  Efficiency concernts rule out both lists and lazy
+--   bytestrings.  The usual use for lazy bytestrings would be to
+--   allow consumption of very large input without a large footprint.
+--   For this need, Attoparsec's incremental input provides an
+--   excellent substitute, with much more control over when input
+--   takes place.
+--
+-- * Parsec parsers can produce more helpful error messages than
+--   Attoparsec parsers.  This is a matter of focus: Attoparsec avoids
+--   the extra book-keeping in favour of higher performance.
+
+-- $performance
+--
+-- If you write an Attoparsec-based parser carefully, it can be
+-- realistic to expect it to perform within a factor of 2 of a
+-- hand-rolled C parser (measuring megabytes parsed per second).
+--
+-- To actually achieve high performance, there are a few guidelines
+-- that it is useful to follow.
+--
+-- Use the 'B.ByteString'-oriented parsers whenever possible,
+-- e.g. 'I.takeWhile1' instead of 'many1' 'I.anyWord8'.  There is
+-- about a factor of 100 difference in performance between the two
+-- kinds of parser.
+--
+-- For very simple byte-testing predicates, write them by hand instead
+-- of using 'I.inClass' or 'I.notInClass'.  For instance, both of
+-- these predicates test for an end-of-line byte, but the first is
+-- much faster than the second:
+--
+-- >endOfLine_fast w = w == 13 || w == 10
+-- >endOfLine_slow   = inClass "\r\n"
+--
+-- Make active use of benchmarking and profiling tools to measure,
+-- find the problems with, and improve the performance of your parser.
+
+-- $instances
+--
+-- The 'I.Parser' type is an instance of the following classes:
+--
+-- * 'Monad', where 'fail' throws an exception (i.e. fails) with an
+--   error message.
+--
+-- * 'Functor' and 'Applicative', which follow the usual definitions.
+--
+-- * 'MonadPlus', where 'mzero' fails (with no error message) and
+--   'mplus' executes the right-hand parser if the left-hand one
+--   fails.
+--
+-- * 'Alternative', which follows 'MonadPlus'.
+--
+-- The 'Result' type is an instance of 'Functor', where 'fmap'
+-- transforms the value in a 'Done' result.
+
 -- | The result of a parse.
 data Result r = Fail !B.ByteString [String] String
               -- ^ The parse failed.  The 'B.ByteString' is the input
               -- in which the error occurred.  The 'String' is the
               -- message describing the error, if any.
               | Partial (B.ByteString -> Result r)
-              -- ^ Pass this continuation more input so that the
-              -- parser can resume.  Pass it an 'B.empty' string to
-              -- indicate that no more input is available.
+              -- ^ Supply this continuation with more input so that
+              -- the parser can resume.  To indicate that no more
+              -- input is available, use an 'B.empty' string.
               | Done !B.ByteString r
               -- ^ The parse succeeded.  The 'B.ByteString' is the
               -- input that had not yet been consumed (if any) when

Data/Attoparsec/Char8.hs

 -- Stability   :  experimental
 -- Portability :  unknown
 --
--- Simple, efficient, character-oriented parser combinators for
+-- Simple, efficient, character-oriented combinator parsing for
 -- 'B.ByteString' strings, loosely based on the Parsec library.
--- 
--- /Note/: This module is intended for parsing text that is
--- represented using an 8-bit character set, e.g. ASCII or
--- ISO-8859-15.  It /does not/ deal with character encodings,
--- multibyte characters, or wide characters.  Any attempts to use
--- characters above code point 255 will give wrong answers.
+
 module Data.Attoparsec.Char8
     (
+    -- * Character encodings
+    -- $encodings
+
     -- * Parser types
       Parser
     , A.Result(..)
     , module Data.Attoparsec.Combinator
 
     -- * Parsing individual characters
+    , satisfy
+    , char
     , anyChar
-    , char
     , char8
+    , notChar
+
+    -- ** Special character parsers
     , digit
-    , letter
-    , notChar
+    , letter_iso8859_15
+    , letter_ascii
     , space
-    , satisfy
 
-    -- ** Character classes
+    -- ** Fast predicates
+    , isDigit
+    , isAlpha_iso8859_15
+    , isAlpha_ascii
+
+    -- *** Character classes
     , inClass
     , notInClass
 
     , stringCI
     , skipSpace
     , skipWhile
-    , take
+    , I.take
     , takeTill
     , takeWhile
     , takeWhile1
     , isHorizontalSpace
 
     -- * Numeric parsers
-    , hexNumber
-    --, int
-    --, integer
+    , decimal
+    , hexadecimal
+    , signed
     --, double
 
     -- * State observation and manipulation functions
     , I.ensure
     ) where
 
+import Control.Applicative ((*>), (<$>), (<|>))
 import Data.Attoparsec.Combinator
 import Data.Attoparsec.FastSet (charClass, memberChar)
 import Data.Attoparsec.Internal (Parser, (<?>))
 import Data.ByteString.Internal (c2w, w2c)
--- import Data.ByteString.Lex.Double (readDouble)
 import Data.Word (Word8)
 import Prelude hiding (takeWhile)
 import qualified Data.Attoparsec as A
 import qualified Data.Attoparsec.Internal as I
+import qualified Data.ByteString as B8
 import qualified Data.ByteString.Char8 as B
-import qualified Data.ByteString as B8
 
+-- $encodings
+--
+-- This module is intended for parsing text that is
+-- represented using an 8-bit character set, e.g. ASCII or
+-- ISO-8859-15.  It /does not/ make any attempt to deal with character
+-- encodings, multibyte characters, or wide characters.  In
+-- particular, all attempts to use characters above code point U+00FF
+-- will give wrong answers.
+--
+-- Code points below U+0100 are simply translated to and from their
+-- numeric values, so e.g. the code point U+00A4 becomes the byte
+-- @0xA4@ (which is the Euro symbol in ISO-8859-15, but the generic
+-- currency sign in ISO-8859-1).  Haskell 'Char' values above U+00FF
+-- are truncated, so e.g. U+1D6B7 is truncated to the byte @0xB7@.
+
+-- ASCII-specific but fast, oh yes.
 toLower :: Word8 -> Word8
 toLower w | w >= 65 && w <= 90 = w + 32
           | otherwise          = w
 stringCI = I.stringTransform (B8.map toLower)
 {-# INLINE stringCI #-}
 
+-- | Consume input as long as the predicate returns 'True', and return
+-- the consumed input.
+--
+-- This parser requires the predicate to succeed on at least one byte
+-- of input: it will fail if the predicate never returns 'True' or if
+-- there is no input left.
 takeWhile1 :: (Char -> Bool) -> Parser B.ByteString
 takeWhile1 p = I.takeWhile1 (p . w2c)
 {-# INLINE takeWhile1 #-}
 
--- | Character parser.
+-- | The parser @satisfy p@ succeeds for any byte for which the
+-- predicate @p@ returns 'True'. Returns the byte that is actually
+-- parsed.
+--
+-- >digit = satisfy isDigit
+-- >    where isDigit c = c >= '0' && c <= '9'
 satisfy :: (Char -> Bool) -> Parser Char
 satisfy = I.satisfyWith w2c
 {-# INLINE satisfy #-}
 
-letter :: Parser Char
-letter = satisfy isLetter <?> "letter"
-  where isLetter c = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
-{-# INLINE letter #-}
+-- | Match a letter, in the ISO-8859-15 encoding.
+letter_iso8859_15 :: Parser Char
+letter_iso8859_15 = satisfy isAlpha_iso8859_15 <?> "letter_iso8859_15"
+{-# INLINE letter_iso8859_15 #-}
 
+-- | Match a letter, in the ASCII encoding.
+letter_ascii :: Parser Char
+letter_ascii = satisfy isAlpha_ascii <?> "letter_ascii"
+{-# INLINE letter_ascii #-}
+
+-- | A fast alphabetic predicate for the ISO-8859-15 encoding
+--
+-- /Note/: For all character encodings other than ISO-8859-15, and
+-- almost all Unicode code points above U+00A3, this predicate gives
+-- /wrong answers/.
+isAlpha_iso8859_15 :: Char -> Bool
+isAlpha_iso8859_15 c = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
+                       (c >= '\166' && moby c)
+  where moby = notInClass "\167\169\171-\179\182\183\185\187\191\215\247"
+        {-# NOINLINE moby #-}
+{-# INLINE isAlpha_iso8859_15 #-}
+
+-- | A fast alphabetic predicate for the ASCII encoding
+--
+-- /Note/: For all character encodings other than ASCII, and
+-- almost all Unicode code points above U+007F, this predicate gives
+-- /wrong answers/.
+isAlpha_ascii :: Char -> Bool
+isAlpha_ascii c = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
+{-# INLINE isAlpha_ascii #-}
+
+-- | Parse a single digit.
 digit :: Parser Char
 digit = satisfy isDigit <?> "digit"
-  where isDigit c = c >= '0' && c <= '9'
 {-# INLINE digit #-}
 
+-- | A fast digit predicate.
+isDigit :: Char -> Bool
+isDigit c = c >= '0' && c <= '9'
+{-# INLINE isDigit #-}
+
+-- | Match any character.
 anyChar :: Parser Char
 anyChar = satisfy $ const True
 {-# INLINE anyChar #-}
 
+-- | Fast predicate for matching a space character.
+--
+-- /Note/: This predicate only gives correct answers for the ASCII
+-- encoding.  For instance, it does not recognise U+00A0 (non-breaking
+-- space) as a space character, even though it is a valid ISO-8859-15
+-- byte.
 isSpace :: Char -> Bool
 isSpace c = c `B.elem` spaces
     where spaces = B.pack " \n\r\t\v\f"
+          {-# NOINLINE spaces #-}
+{-# INLINE isSpace #-}
 
+-- | Parse a space character.
+--
+-- /Note/: This parser only gives correct answers for the ASCII
+-- encoding.  For instance, it does not recognise U+00A0 (non-breaking
+-- space) as a space character, even though it is a valid ISO-8859-15
+-- byte.
 space :: Parser Char
 space = satisfy isSpace <?> "space"
 {-# INLINE space #-}
 char c = satisfy (== c) <?> [c]
 {-# INLINE char #-}
 
--- | Match a specific character.
+-- | Match a specific character, but return its 'Word8' value.
 char8 :: Char -> Parser Word8
 char8 c = I.satisfy (== c2w c) <?> [c]
 {-# INLINE char8 #-}
 
 -- | Match any character in a set.
 --
--- > vowel = inClass "aeiou"
+-- >vowel = inClass "aeiou"
 --
 -- Range notation is supported.
 --
--- > halfAlphabet = inClass "a-nA-N"
+-- >halfAlphabet = inClass "a-nA-N"
 --
 -- To add a literal \'-\' to a set, place it at the beginning or end
 -- of the string.
 notInClass s = not . inClass s
 {-# INLINE notInClass #-}
 
--- | Consume characters while the predicate succeeds.
+-- | Consume input as long as the predicate returns 'True', and return
+-- the consumed input.
+--
+-- This parser does not fail.  It will return an empty string if the
+-- predicate returns 'False' on the first byte of input.
+--
+-- /Note/: Because this parser does not fail, do not use it with
+-- combinators such as 'many', because such parsers loop until a
+-- failure occurs.  Careless use will thus result in an infinite loop.
 takeWhile :: (Char -> Bool) -> Parser B.ByteString
 takeWhile p = I.takeWhile (p . w2c)
 {-# INLINE takeWhile #-}
 
--- | Consume characters while the predicate fails.
+-- | Consume input as long as the predicate returns 'False'
+-- (i.e. until it returns 'True'), and return the consumed input.
+--
+-- This parser does not fail.  It will return an empty string if the
+-- predicate returns 'True' on the first byte of input.
+--
+-- /Note/: Because this parser does not fail, do not use it with
+-- combinators such as 'many', because such parsers loop until a
+-- failure occurs.  Careless use will thus result in an infinite loop.
 takeTill :: (Char -> Bool) -> Parser B.ByteString
 takeTill p = I.takeTill (p . w2c)
 {-# INLINE takeTill #-}
 
--- | Skip over characters while the predicate succeeds.
+-- | Skip past input for as long as the predicate returns 'True'.
 skipWhile :: (Char -> Bool) -> Parser ()
 skipWhile p = I.skipWhile (p . w2c)
 {-# INLINE skipWhile #-}
 skipSpace = skipWhile isSpace >> return ()
 {-# INLINE skipSpace #-}
 
+-- | A predicate that matches either a carriage return @\'\\r\'@ or
+-- newline @\'\\n\'@ character.
 isEndOfLine :: Word8 -> Bool
 isEndOfLine w = w == 13 || w == 10
 {-# INLINE isEndOfLine #-}
 
+-- | A predicate that matches either a space @\' \'@ or horizontal tab
+-- @\'\\t\'@ character.
 isHorizontalSpace :: Word8 -> Bool
 isHorizontalSpace w = w == 32 || w == 9
 {-# INLINE isHorizontalSpace #-}
 
 {-
-numeric :: String -> (B.ByteString -> Maybe (a,B.ByteString)) -> Parser a
-numeric desc f = do
-  s <- getInput
-  case f s of
-    Nothing -> fail desc
-    Just (i,s') -> setInput s' >> return i
-                   
--- | Parse an integer.  The position counter is not updated.
-int :: Parser Int
-int = numeric "Int" B.readInt
-
--- | Parse an integer.  The position counter is not updated.
-integer :: Parser Integer
-integer = numeric "Integer" B.readInteger
-
 -- | Parse a Double.  The position counter is not updated.
 double :: Parser Double
 double = numeric "Double" readDouble
 -}
 
-hexNumber :: Integral a => Parser a
-{-# SPECIALISE hexNumber :: Parser Int #-}
-hexNumber = fromHex `fmap` I.takeWhile1 isHexDigit
+-- | Parse and decode an unsigned hexadecimal number.  The hex digits
+-- @\'a\'@ through @\'f\'@ may be upper or lower case.
+--
+-- This parser does not accept a leading @\"0x\"@ string.
+hexadecimal :: Integral a => Parser a
+{-# SPECIALISE hexadecimal :: Parser Int #-}
+hexadecimal = B8.foldl' step 0 `fmap` I.takeWhile1 isHexDigit
   where isHexDigit w = (w >= 48 && w <= 57) || (x >= 97 && x <= 102)
             where x = toLower w
-        fromHex = B8.foldl' step 0
         step a w | w >= 48 && w <= 57  = a * 16 + fromIntegral (w - 48)
-                 | x >= 97 && x <= 102 = a * 16 + fromIntegral (x - 87)
-                 | otherwise           = error "impossible"
+                 | otherwise           = a * 16 + fromIntegral (x - 87)
             where x = toLower w
+
+-- | Parse and decode an unsigned decimal number.
+decimal :: Integral a => Parser a
+{-# SPECIALISE decimal :: Parser Int #-}
+decimal = B8.foldl' step 0 `fmap` I.takeWhile1 isDig
+  where isDig w  = w >= 48 && w <= 57
+        step a w = a * 10 + fromIntegral (w - 48)
+
+-- | Parse a number with an optional leading @\'+\'@ or @\'-\'@ sign
+-- character.
+signed :: Num a => Parser a -> Parser a
+{-# SPECIALISE signed :: Parser Int -> Parser Int #-}
+signed p = (negate <$> char8 '-' *> p)
+       <|> (char8 '+' *> p)
+       <|> p

Data/Attoparsec/Combinator.hs

 {-# LANGUAGE BangPatterns, CPP #-}
------------------------------------------------------------------------------
 -- |
 -- Module      :  Data.Attoparsec.Combinator
--- Copyright   :  Daan Leijen 1999-2001, Bryan O'Sullivan 2009
+-- Copyright   :  Daan Leijen 1999-2001, Bryan O'Sullivan 2009-2010
 -- License     :  BSD3
 -- 
 -- Maintainer  :  bos@serpentine.com
 -- Portability :  portable
 --
 -- Useful parser combinators, similar to those provided by Parsec.
--- 
------------------------------------------------------------------------------
 module Data.Attoparsec.Combinator
     (
       choice
     , skipMany
     , skipMany1
     , eitherP
+
     -- * Inlined implementations of existing functions
+    --
+    -- These are exact duplicates of functions already exported by the
+    -- 'Control.Applicative' module, but whose definitions are
+    -- inlined.  In many cases, this leads to 2x performance
+    -- improvements.
     , many
     ) where
 
 eitherP a b = (Left <$> a) <|> (Right <$> b)
 {-# INLINE eitherP #-}
 
+-- | Zero or more.
 many :: (Alternative f) => f a -> f [a]
 many v = many_v
     where many_v = some_v <|> pure []

Data/Attoparsec/Internal.hs

 
     -- * Running parsers
     , parse
-    , parseAll
-    , feed
 
     -- * Combinators
     , (<?>)
     , string
     , stringTransform
     , take
-    , takeTill
     , takeWhile
     , takeWhile1
+    , takeTill
 
     -- * State observation and manipulation functions
     , endOfInput
     where msg = "Failed reading: " ++ err
 {-# INLINE failDesc #-}
 
+-- | Succeed only if at least @n@ bytes of input are available.
 ensure :: Int -> Parser ()
 ensure n = Parser $ \st0@(S s0 _a0 _c0) kf ks ->
     if B.length s0 >= n
     then ks st0 ()
     else runParser (demandInput >> ensure n) st0 kf ks
 
+-- | Immediately demand more input via a 'Partial' continuation
+-- result.
 demandInput :: Parser ()
 demandInput = Parser $ \st0@(S s0 a0 c0) kf ks ->
     if c0 == Complete
          else let st1 = S (s0 +++ s) (a0 +++ s) Incomplete
               in  ks st1 ()
 
+-- | This parser always succeeds.  It returns 'True' if any input is
+-- available either immediately or on demand, and 'False' if the end
+-- of all input has been reached.
 wantInput :: Parser Bool
 wantInput = Parser $ \st0@(S s0 a0 c0) _kf ks ->
   case undefined of
 put :: B.ByteString -> Parser ()
 put s = Parser (\(S _s0 a0 c0) _kf ks -> ks (S s a0 c0) ())
 
-take :: Int -> Parser B.ByteString
-take n = takeWith n (const True)
-{-# INLINE take #-}
-
 (+++) :: B.ByteString -> B.ByteString -> B.ByteString
 (+++) = B.append
 {-# INLINE (+++) #-}
 
+-- | Attempt a parse, and if it fails, rewind the input so that no
+-- input appears to have been consumed.
+--
+-- This combinator is useful in cases where a parser might consume
+-- some input before failing, i.e. the parser needs arbitrary
+-- lookahead.  The downside to using this combinator is that it can
+-- retain input for longer than is desirable.
 try :: Parser a -> Parser a
 try p = Parser $ \st0 kf ks ->
         runParser p (noAdds st0) (kf . mappend st0) ks
 
+-- | The parser @satisfy p@ succeeds for any byte for which the
+-- predicate @p@ returns 'True'. Returns the byte that is actually
+-- parsed.
+--
+-- >digit = satisfy isDigit
+-- >    where isDigit w = w >= 48 && w <= 57
 satisfy :: (Word8 -> Bool) -> Parser Word8
 satisfy p = do
   ensure 1
     then put (B.unsafeTail s) >> return w
     else fail "satisfy"
 
--- | Character parser.
+-- | The parser @satisfyWith f p@ transforms a byte, and succeeds if
+-- the predicate @p@ returns 'True' on the transformed value. The
+-- parser returns the transformed byte that was parsed.
 satisfyWith :: (Word8 -> a) -> (a -> Bool) -> Parser a
 satisfyWith f p = do
   ensure 1
     (fp,o,_) <- B.toForeignPtr `fmapP` take (sizeOf dummy)
     return . B.inlinePerformIO . withForeignPtr fp $ \p -> peek (castPtr $ p `plusPtr` o)
 
+-- | Consume @n@ bytes of input, but succeed only if the predicate
+-- returns 'True'.
 takeWith :: Int -> (B.ByteString -> Bool) -> Parser B.ByteString
 takeWith n p = do
   ensure n
     then put t >> return h
     else failDesc "takeWith"
 
+-- | Consume exactly @n@ bytes of input.
+take :: Int -> Parser B.ByteString
+take n = takeWith n (const True)
+{-# INLINE take #-}
+
+-- | @string s@ parses a sequence of bytes that identically match
+-- @s@. Returns the parsed string (i.e. @s@).  This parser consumes no
+-- input if it fails (even if a partial match).
+--
+-- /Note/: The behaviour of this parser is different to that of the
+-- similarly-named parser in Parsec, as this one is all-or-nothing.
+-- To illustrate the difference, the following parser will fail under
+-- Parsec given an input of @"for"@:
+--
+-- >string "foo" <|> string "for"
+--
+-- The reason for its failure is that that the first branch is a
+-- partial match, and will consume the letters @\'f\'@ and @\'o\'@
+-- before failing.  In Attoparsec, the above parser will /succeed/ on
+-- that input, because the failed first branch will consume nothing.
 string :: B.ByteString -> Parser B.ByteString
 string s = takeWith (B.length s) (==s)
 {-# INLINE string #-}
 stringTransform f s = takeWith (B.length s) ((==s) . f)
 {-# INLINE stringTransform #-}
 
+-- | Skip past input for as long as the predicate returns 'True'.
 skipWhile :: (Word8 -> Bool) -> Parser ()
 skipWhile p = go
  where
       put t
       when (B.null t) go
 
+-- | Consume input as long as the predicate returns 'False'
+-- (i.e. until it returns 'True'), and return the consumed input.
+--
+-- This parser does not fail.  It will return an empty string if the
+-- predicate returns 'True' on the first byte of input.
+--
+-- /Note/: Because this parser does not fail, do not use it with
+-- combinators such as 'many', because such parsers loop until a
+-- failure occurs.  Careless use will thus result in an infinite loop.
 takeTill :: (Word8 -> Bool) -> Parser B.ByteString
 takeTill p = takeWhile (not . p)
 {-# INLINE takeTill #-}
 
+-- | Consume input as long as the predicate returns 'True', and return
+-- the consumed input.
+--
+-- This parser does not fail.  It will return an empty string if the
+-- predicate returns 'False' on the first byte of input.
+--
+-- /Note/: Because this parser does not fail, do not use it with
+-- combinators such as 'many', because such parsers loop until a
+-- failure occurs.  Careless use will thus result in an infinite loop.
 takeWhile :: (Word8 -> Bool) -> Parser B.ByteString
 takeWhile p = go
  where
           else return h
       else return B.empty
 
+-- | Consume input as long as the predicate returns 'True', and return
+-- the consumed input.
+--
+-- This parser requires the predicate to succeed on at least one byte
+-- of input: it will fail if the predicate never returns 'True' or if
+-- there is no input left.
 takeWhile1 :: (Word8 -> Bool) -> Parser B.ByteString
 takeWhile1 p = do
   (`when` demandInput) =<< B.null <$> get
     then (h+++) `fmapP` takeWhile p
     else return h
 
--- | Match any character in a set.
+-- | Match any byte in a set.
 --
--- > vowel = inClass "aeiou"
+-- >vowel = inClass "aeiou"
 --
 -- Range notation is supported.
 --
--- > halfAlphabet = inClass "a-nA-N"
+-- >halfAlphabet = inClass "a-nA-N"
 --
--- To add a literal \'-\' to a set, place it at the beginning or end
+-- To add a literal @\'-\'@ to a set, place it at the beginning or end
 -- of the string.
 inClass :: String -> Word8 -> Bool
 inClass s = (`memberWord8` mySet)
     where mySet = charClass s
 {-# INLINE inClass #-}
 
--- | Match any character not in a set.
+-- | Match any byte not in a set.
 notInClass :: String -> Word8 -> Bool
 notInClass s = not . inClass s
 {-# INLINE notInClass #-}
 notWord8 c = satisfy (/= c) <?> "not " ++ show c
 {-# INLINE notWord8 #-}
 
+-- | Match only if all input has been consumed.
 endOfInput :: Parser ()
 endOfInput = Parser $ \st0@S{..} kf ks ->
              if B.null input
                        in  runParser demandInput st0 kf' ks'
              else kf st0 [] "endOfInput"
                                                
+-- | Match either a single newline character @\'\\n\'@, or a carriage
+-- return followed by a newline character @\"\\r\\n\"@.
 endOfLine :: Parser ()
 endOfLine = (word8 10 >> return ()) <|> (string (B.pack "\r\n") >> return ())
 
 {-# INLINE (<?>) #-}
 infix 0 <?>
 
+-- | Terminal failure continuation.
 failK :: Failure a
 failK st0 stack msg = Fail st0 stack msg
 
+-- | Terminal success continuation.
 successK :: Success a a
 successK state a = Done state a
 
+-- | Run a parser.
 parse :: Parser a -> B.ByteString -> Result a
 parse m s = runParser m (S s B.empty Incomplete) failK successK
 {-# INLINE parse #-}
-              
-feed :: Result r -> B.ByteString -> Result r
-feed f@(Fail _ _ _) _ = f
-feed (Partial k) d = k d
-feed (Done (S s a c) r) d = Done (S (s +++ d) a c) r
-
-parseAll :: Parser a -> [B.ByteString] -> Result a
-parseAll p ss = case ss of
-                  []     -> go (parse p B.empty) []
-                  (c:cs) -> go (parse p c) cs
-  where go (Partial k) (c:cs) = go (k c) cs
-        go (Partial k) []     = k B.empty
-        go r           _      = r
 author:          Bryan O'Sullivan <bos@serpentine.com>
 maintainer:      Bryan O'Sullivan <bos@serpentine.com>
 stability:       experimental
-tested-with:     GHC == 6.8.3, GHC == 6.10.1
-synopsis:        Fast bytestring combinator parsing
-description:     Fast bytestring combinator parsing
+tested-with:     GHC == 6.10.4, GHC == 6.12.1
+synopsis:        Fast combinator parsing for bytestrings
 cabal-version:   >= 1.2
 build-type:      Simple
-description:     Fast, flexible text-oriented parsing of lazy ByteStrings.
+description:
+    A fast parser combinator library, aimed particularly at dealing
+    efficiently with network protocols and complicated text/binary
+    file formats.
 
 flag split-base
 flag applicative-in-base
     -- bytestring was in base-2.0 and 2.1.1
     build-depends: base >= 2.0 && < 2.2
   else
-    -- in base 1.0 and 3.0 bytestring is a separate package
+    -- in base 1.0 and >= 3.0 bytestring is a separate package
     build-depends: base < 2.0 || >= 3, bytestring >= 0.9, containers >= 0.1.0.1
 
   if flag(applicative-in-base)
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.