Commits

Bryan O'Sullivan committed 2ce9eb0

Add documentation, tidy up code

  • Participants
  • Parent commits baaa244

Comments (0)

Files changed (11)

File Data/Attoparsec.hs

 -- Stability   :  experimental
 -- Portability :  unknown
 --
--- Simple, efficient parser combinators for lazy 'LB.ByteString'
+-- Simple, efficient parser combinators for lazy 'ByteString'
 -- strings, loosely based on 'Text.ParserCombinators.Parsec'.
 -- 
 -----------------------------------------------------------------------------
 module Data.Attoparsec
     (
-    -- * Parser
+    -- * Parser types
       ParseError
     , Parser
 
 
     -- * Combinators
     , (<?>)
+    , try
+    , module Data.Attoparsec.Combinator
 
-    -- * Things vaguely like those in @Parsec.Combinator@ (and @Parsec.Prim@)
-    , try
-    , endOfInput
-    , lookAhead
-    , peek
+    -- * Parsing individual bytes
+    , anyWord8
+    , notWord8
+    , word8
+    , satisfy
 
-    -- * Things like in @Parsec.Char@
-    , satisfy
-    , anyWord8
-    , word8
-    , notWord8
+    -- * Efficient string handling
     , string
+    , skipWhile
     , stringTransform
-
-    -- * Parser converters.
-    , eitherP
-
-    -- * Miscellaneous functions.
-    , getInput
-    , getConsumed
+    , takeAll
+    , takeTill
     , takeWhile
     , takeWhile1
-    , takeTill
-    , takeAll
-    , skipWhile
+
+    -- ** Combinators
+    , match
     , notEmpty
-    , match
 
-    , module Data.Attoparsec.Combinator
+    -- * State observation functions
+    , endOfInput
+    , getConsumed
+    , getInput
+    , lookAhead
     ) where
 
 import Data.Attoparsec.Combinator

File Data/Attoparsec/Char8.hs

 -----------------------------------------------------------------------------
 -- |
 -- Module      :  Data.Attoparsec.Char8
--- Copyright   :  Daan Leijen 1999-2001, Jeremy Shaw 2006, Bryan O'Sullivan 2007-2008
+-- Copyright   :  Daan Leijen 1999-2001, Jeremy Shaw 2006, Bryan O'Sullivan 2007-2009
 -- License     :  BSD3
 -- 
 -- Maintainer  :  bos@serpentine.com
 -- Stability   :  experimental
 -- Portability :  unknown
 --
--- Simple, efficient parser combinators for lazy 'LB.ByteString'
--- strings, loosely based on 'Text.ParserCombinators.Parsec'.
+-- Simple, efficient, character-oriented parser combinators for lazy
+-- 'LB.ByteString' strings, loosely based on the Parsec library.
 -- 
+-- /Note/: This module is intended for parsing text that is
+-- represented using an 8-bit character set, e.g. ASCII or
+-- ISO-8859-15.  It /does not/ deal with character encodings,
+-- multibyte characters, or wide characters.  Any attempts to use
+-- characters above code point 255 will give wrong answers.
 -----------------------------------------------------------------------------
 module Data.Attoparsec.Char8
     (
-    -- * Parser
+    -- * Parser types
       ParseError
     , Parser
 
 
     -- * Combinators
     , (<?>)
+    , try
 
-    -- * Things vaguely like those in @Parsec.Combinator@ (and @Parsec.Prim@)
-    , try
-    , endOfInput
-    , lookAhead
-    , peek
+    -- * Parsing individual characters
+    , anyChar
+    , char
+    , digit
+    , letter
+    , notChar
+    , space
+    , satisfy
 
-    -- * Things like in @Parsec.Char@
-    , satisfy
-    , letter
-    , digit
-    , anyChar
-    , space
-    , char
-    , notChar
+    -- ** Character classes
+    , inClass
+    , notInClass
+
+    -- * Efficient string handling
     , string
     , stringCI
+    , skipSpace
+    , skipWhile
+    , takeAll
+    , takeCount
+    , takeTill
+    , takeWhile
+    , takeWhile1
 
-    -- * Parser converters.
-    , eitherP
+    -- ** Combinators
+    , match
+    , notEmpty
 
-    -- * Numeric parsers.
+    -- * Text parsing
+    , endOfLine
+
+    -- * Numeric parsers
     , int
     , integer
     , double
 
-    -- * Miscellaneous functions.
+    -- * State observation functions
+    , endOfInput
+    , getConsumed
     , getInput
-    , getConsumed
-    , takeWhile
-    , takeWhile1
-    , takeTill
-    , takeAll
-    , takeCount
-    , skipWhile
-    , skipSpace
-    , notEmpty
-    , match
-    , inClass
-    , notInClass
-    , endOfLine
+    , lookAhead
 
+    -- * Combinators
     , module Data.Attoparsec.Combinator
     ) where
 
 import Data.ByteString.Internal (w2c)
 import Data.Char (isDigit, isLetter, isSpace, toLower)
 import Data.Attoparsec.FastSet
-    (FastSet, memberChar, set)
+    (FastSet, charClass, memberChar, set)
 import qualified Data.Attoparsec.Internal as I
 import Data.Attoparsec.Combinator
 import Data.Attoparsec.Internal
     (Parser, ParseError, (<?>), parse, parseAt, parseTest, try, endOfInput,
-     lookAhead, peek, string,
-     eitherP, getInput, getConsumed, takeAll, takeCount, notEmpty, match,
+     lookAhead, string,
+     getInput, getConsumed, takeAll, takeCount, notEmpty, match,
      endOfLine, setInput)
 import Data.ByteString.Lex.Lazy.Double (readDouble)
 import Prelude hiding (takeWhile)

File Data/Attoparsec/Char8Boilerplate.h

 space = satisfy isSpace <?> "space"
 {-# INLINE space #-}
 
--- | Satisfy a specific character.
+-- | Match a specific character.
 char :: Char -> PARSER Char
 char c = satisfy (== c) <?> [c]
 {-# INLINE char #-}
 
--- | Satisfy a specific character.
+-- | Match any character except the given one.
 notChar :: Char -> PARSER Char
 notChar c = satisfy (/= c) <?> "not " ++ [c]
 {-# INLINE notChar #-}
 
-charClass :: String -> FastSet
-charClass = set . SB.pack . go
-    where go (a:'-':b:xs) = [a..b] ++ go xs
-          go (x:xs) = x : go xs
-          go _ = ""
-
+-- | Match any character in a set.
+--
+-- > vowel = inClass "aeiou"
+--
+-- Range notation is supported.
+--
+-- > halfAlphabet = inClass "a-nA-N"
+--
+-- To add a literal \'-\' to a set, place it at the beginning or end
+-- of the string.
 inClass :: String -> Char -> Bool
-inClass s = (`memberChar` myset)
-    where myset = charClass s
+inClass s = (`memberChar` mySet)
+    where mySet = charClass s
 {-# INLINE inClass #-}
 
+-- | Match any character not in a set.
 notInClass :: String -> Char -> Bool
 notInClass s = not . inClass s
 {-# INLINE notInClass #-}
 
--- | Consume characters while the predicate is true.
+-- | Consume characters while the predicate succeeds.
 takeWhile :: (Char -> Bool) -> PARSER LB.ByteString
 takeWhile p = I.takeWhile (p . w2c)
 {-# INLINE takeWhile #-}
 
+-- | Consume characters while the predicate fails.
 takeTill :: (Char -> Bool) -> PARSER LB.ByteString
 takeTill p = I.takeTill (p . w2c)
 {-# INLINE takeTill #-}
 
--- | Skip over characters while the predicate is true.
+-- | Skip over characters while the predicate succeeds.
 skipWhile :: (Char -> Bool) -> PARSER ()
 skipWhile p = I.skipWhile (p . w2c)
 {-# INLINE skipWhile #-}

File Data/Attoparsec/Combinator.hs

 -- Stability   :  experimental
 -- Portability :  portable
 --
--- Useful parser combinators, similar to Parsec.
+-- Useful parser combinators, similar to those provided by Parsec.
 -- 
 -----------------------------------------------------------------------------
 module Data.Attoparsec.Combinator
     , sepBy1
     , skipMany
     , skipMany1
+    , eitherP
     , module Control.Applicative
     ) where
 
 import Control.Applicative
 
--- | @choice ps@ tries to apply the parsers in the list @ps@ in order,
+-- | @choice ps@ tries to apply the actions in the list @ps@ in order,
 -- until one of them succeeds. Returns the value of the succeeding
--- parser.
+-- action.
 choice :: Alternative f => [f a] -> f a
 choice = foldr (<|>) empty
 
--- | @option x p@ tries to apply parser @p@. If @p@ fails without
+-- | @option x p@ tries to apply action @p@. If @p@ fails without
 -- consuming input, it returns the value @x@, otherwise the value
 -- returned by @p@.
 --
 option :: Alternative f => a -> f a -> f a
 option x p = p <|> pure x
 
--- | @many1 p@ applies the parser @p@ /one/ or more times. Returns a
+-- | @many1 p@ applies the action @p@ /one/ or more times. Returns a
 -- list of the returned values of @p@.
 --
 -- >  word  = many1 letter
 many1 :: Alternative f => f a -> f [a]
 many1 p = liftA2 (:) p (many p)
 
--- | @sepBy p sep@ parses /zero/ or more occurrences of @p@, separated
--- by @sep@. Returns a list of values returned by @p@.
+-- | @sepBy p sep@ applies /zero/ or more occurrences of @p@, separated
+-- by @sep@. Returns a list of the values returned by @p@.
 --
 -- > commaSep p  = p `sepBy` (symbol ",")
 sepBy :: Alternative f => f a -> f s -> f [a]
 sepBy p s = liftA2 (:) p ((s *> sepBy1 p s) <|> pure []) <|> pure []
 
--- | @sepBy1 p sep@ parses /one/ or more occurrences of @p@, separated
--- by @sep@. Returns a list of values returned by @p@.
+-- | @sepBy1 p sep@ applies /one/ or more occurrences of @p@, separated
+-- by @sep@. Returns a list of the values returned by @p@.
 --
 -- > commaSep p  = p `sepBy` (symbol ",")
 sepBy1 :: Alternative f => f a -> f s -> f [a]
 sepBy1 p s = scan
     where scan = liftA2 (:) p ((s *> scan) <|> pure [])
 
--- | @manyTill p end@ applies parser @p@ /zero/ or more times until
--- parser @end@ succeeds. Returns the list of values returned by @p@.
--- This parser can be used to scan comments:
+-- | @manyTill p end@ applies action @p@ /zero/ or more times until
+-- action @end@ succeeds, and returns the list of values returned by
+-- @p@.  This can be used to scan comments:
 --
 -- >  simpleComment   = string "<!--" *> manyTill anyChar (try (string "-->"))
 --
 manyTill p end = scan
     where scan = (end *> pure []) <|> liftA2 (:) p scan
 
--- | Skip zero or more instances of the parser.
+-- | Skip zero or more instances of an action.
 skipMany :: Alternative f => f a -> f ()
 skipMany p = scan
     where scan = (p *> scan) <|> pure ()
 
--- | Skip one or more instances of the parser.
+-- | Skip one or more instances of an action.
 skipMany1 :: Alternative f => f a -> f ()
 skipMany1 p = p *> skipMany p
 
--- | Apply the given parser repeatedly, returning every parse result.
+-- | Apply the given action repeatedly, returning every result.
 count :: Monad m => Int -> m a -> m [a]
 count n p = sequence (replicate n p)
 {-# INLINE count #-}
+
+-- | Combine two alternatives.
+eitherP :: (Alternative f) => f a -> f b -> f (Either a b)
+eitherP a b = (Left <$> a) <|> (Right <$> b)
+{-# INLINE eitherP #-}

File Data/Attoparsec/FastSet.hs

     , memberWord8
     -- * Debugging
     , fromSet
+    -- * Handy interface
+    , charClass
     ) where
 
 import Data.Bits ((.&.), (.|.), shiftL, shiftR)
                     pokeByteOff t byte (prev .|. bit)
                     loop (n + 1)
               in loop 0
+
+charClass :: String -> FastSet
+charClass = set . B8.pack . go
+    where go (a:'-':b:xs) = [a..b] ++ go xs
+          go (x:xs) = x : go xs
+          go _ = ""

File Data/Attoparsec/Incremental.hs

 -- Stability   :  experimental
 -- Portability :  unknown
 --
--- Simple, efficient parser combinators for lazy 'LB.ByteString'
--- strings, loosely based on 'Text.ParserCombinators.Parsec'.
+-- Simple, efficient, and incremental parser combinators for lazy
+-- 'L.ByteString' strings, loosely based on the Parsec library.
 --
 -- This module is heavily influenced by Adam Langley's incremental
--- parser in his binary-strict package.
+-- parser in his @binary-strict@ package.
 -- 
 -----------------------------------------------------------------------------
 module Data.Attoparsec.Incremental
     (
+    -- * Parser types
       Parser
     , Result(..)
+
+    -- * Running parsers
     , parse
     , parseWith
     , parseTest
 
+    -- * Combinators
     , (<?>)
     , try
-    , takeWhile
-    , takeTill
-    , takeCount
-    , string
-    , satisfy
-    , endOfInput
-    , pushBack
 
+    -- * Parsing individual bytes
     , word8
     , notWord8
     , anyWord8
+    , satisfy
 
+    -- * Efficient string handling
+    , string
     , skipWhile
+    , takeCount
+    , takeTill
+    , takeWhile
 
+    -- * State observation and manipulation functions
+    , endOfInput
+    , pushBack
     , yield
 
+    -- * Combinators
     , module Data.Attoparsec.Combinator
     ) where
 
 import qualified Data.ByteString.Lazy.Internal as L
 import Prelude hiding (takeWhile)
 
-data S = S {-# UNPACK #-} !S.ByteString -- ^ first chunk of input
-           L.ByteString                 -- ^ rest of input
-           [L.ByteString]               -- ^ input acquired during backtracks
-           !Bool                        -- ^ have we hit EOF yet?
-           {-# UNPACK #-} !Int          -- ^ failure depth
+data S = S {-# UNPACK #-} !S.ByteString -- first chunk of input
+           L.ByteString                 -- rest of input
+           [L.ByteString]               -- input acquired during backtracks
+           !Bool                        -- have we hit EOF yet?
+           {-# UNPACK #-} !Int          -- failure depth
 
--- | The result of a partial parse
+-- | The result of a partial parse.
 data Result a = Failed String
-                -- ^ the parse failed with the given error message
+                -- ^ The parse failed, with the given error message.
               | Done L.ByteString a
-                -- ^ the parse finished and produced the given list of
-                --   results doing so. Any unparsed data is returned.
+                -- ^ The parse succeeded, producing the given
+                -- result. The 'L.ByteString' contains any unconsumed
+                -- input.
               | Partial (L.ByteString -> Result a)
-                -- ^ the parse ran out of data before finishing, but produced
-                --   the given list of results before doing so. To continue the
-                --   parse pass more data to the given continuation
+                -- ^ The parse ran out of data before finishing. To
+                -- resume the parse, pass more data to the given
+                -- continuation.
 
 instance (Show a) => Show (Result a) where
   show (Failed err)      = "Failed " ++ show err
   show (Done rest rs)    = "Done (" ++ show rest ++ ") " ++ show rs
   show (Partial _)       = "Partial"
 
--- | This is the internal version of the above. This is the type which is
---   actually used by the code, as it has the extra information needed
---   for backtracking. This is converted to an external friendly @Result@
---   type just before giving it to the outside world.
+-- | This is the internal version of the above. This is the type which
+-- is actually used by the code, as it has the extra information
+-- needed for backtracking. This is converted to a friendly 'Result'
+-- type just before giving it to the outside world.
 data IResult a = IFailed S String
                | IDone S a
                | IPartial (L.ByteString -> IResult a)
   show (IDone _ _)     = "IDone"
   show (IPartial _)    = "IPartial"
 
+-- | The parser type.
 newtype Parser r a = Parser {
       unParser :: S -> (a -> S -> IResult r) -> IResult r
     }
     in
       filt $ unParser p1 (S sb lb [] eof (failDepth + 1)) (cutContinuation k)
 
+-- | This is a no-op combinator for compatibility.
 try :: Parser r a -> Parser r a
 try p = p
 
 
 infix 0 <?>
 
--- | Name the parser.
-(<?>) :: Parser r a -> String -> Parser r a
+-- | Name the parser, in case failure occurs.
+(<?>) :: Parser r a
+      -> String                 -- ^ the name to use if parsing fails
+      -> Parser r a
 {-# INLINE (<?>) #-}
 p <?> msg =
   Parser $ \st k ->
 addX s adds | L.null s = adds
             | otherwise = s : adds
 
+-- | Resume our caller, handing back a 'Partial' result. This function
+-- is probably not useful, but provided for completeness.
 yield :: Parser r ()
 yield = Parser $ \(S sb lb adds eof failDepth) k ->
   IPartial $ \s -> k () (S sb (lb `appL` s) (addX s adds) eof failDepth)
      then continue (k left) (takeWith splitf) (k . appL left) st
      else k left (mkState rest adds eof failDepth)
     
+-- | Consume bytes while the predicate succeeds.
 takeWhile :: (Word8 -> Bool) -> Parser r L.ByteString
 takeWhile = takeWith . L.span
 
+-- | Consume bytes while the predicate fails.  If the predicate never
+-- succeeds, the entire input string is returned.
 takeTill :: (Word8 -> Bool) -> Parser r L.ByteString
 takeTill = takeWith . L.break
 
+-- | Return exactly the given number of bytes.  If not enough are
+-- available, fail.
 takeCount :: Int -> Parser r L.ByteString
 takeCount = tc . fromIntegral where
  tc n = Parser $ \st@(S sb lb adds eof failDepth) k ->
            else continue (`IFailed` "takeCount: EOF")
                          (tc (n - l)) (k . appL h) st
 
+-- | Match a literal string exactly.
 string :: L.ByteString -> Parser r L.ByteString
 string s =
   Parser $ \st@(S sb lb adds eof failDepth) k ->
     | L.null s  = S S.empty L.empty [] True failDepth
     | otherwise = mkState s (addX s adds) False failDepth
 
+-- | Match a single byte based on the given predicate.
 satisfy :: (Word8 -> Bool) -> Parser r Word8
 satisfy p =
   Parser $ \st@(S sb lb adds eof failDepth) k ->
                    Nothing -> continue (`IFailed` "satisfy: EOF")
                                        (satisfy p) k st
 
+-- | Force the given string to appear next in the input stream.
 pushBack :: L.ByteString -> Parser r ()
 pushBack bs =
     Parser $ \(S sb lb adds eof failDepth) k ->
         k () (mkState (bs `appL` (sb +: lb)) adds eof failDepth)
 
+-- | Succeed if we have reached the end of the input string.
 endOfInput :: Parser r ()
 endOfInput = Parser $ \st@(S sb lb _adds _eof _failDepth) k ->
              if not (S.null sb) || not (L.null lb)
 terminalContinuation :: a -> S -> IResult a
 terminalContinuation v s = IDone s v
 
+-- | Run a parser.
 parse :: Parser r r -> L.ByteString -> Result r
 parse m input =
   toplevelTranslate $ unParser m (initState input) terminalContinuation
 
-parseWith :: Applicative f => f L.ByteString -> Parser r r -> L.ByteString
+-- | Run a parser, using the given function to resupply it with input.
+--
+-- Here's an example that shows how to parse data from a socket, using
+-- Johan Tibbell's @network-bytestring@ package.
+--
+-- >  import qualified Data.ByteString.Lazy as L
+-- >  import Data.Attoparsec.Incremental (Parser, Result, parseWith)
+-- >  import Network.Socket.ByteString.Lazy (recv_)
+-- >  import Network.Socket (Socket)
+-- >
+-- >  netParse :: Parser r r -> Socket -> IO (Result r)
+-- >  netParse p sock = parseWith (recv_ sock 65536) p L.empty
+parseWith :: Applicative f => f L.ByteString -- ^ resupply parser with input
+          -> Parser r r                      -- ^ parser to run
+          -> L.ByteString                    -- ^ initial input
           -> f (Result r)
 parseWith refill p s =
   case parse p s of
     Partial k -> k <$> refill
     ok        -> pure ok
 
+-- | Try out a parser, and print its result.
 parseTest :: (Show r) => Parser r r -> L.ByteString -> IO ()
 parseTest p s = print (parse p s)
 

File Data/Attoparsec/Incremental/Char8.hs

 -- Stability   :  experimental
 -- Portability :  unknown
 --
--- Simple, efficient parser combinators for lazy 'LB.ByteString'
--- strings, loosely based on 'Text.ParserCombinators.Parsec'.
+-- Simple, efficient, character-oriented, and incremental parser
+-- combinators for lazy 'L.ByteString' strings, loosely based on the
+-- Parsec library.
 -- 
+-- /Note/: This module is intended for parsing text that is
+-- represented using an 8-bit character set, e.g. ASCII or
+-- ISO-8859-15.  It /does not/ deal with character encodings,
+-- multibyte characters, or wide characters.  Any attempts to use
+-- characters above code point 255 will give wrong answers.
 -----------------------------------------------------------------------------
 module Data.Attoparsec.Incremental.Char8
     (
-    -- * Parser
+    -- * Parser types
       Parser
     , Result(..)
 
     -- * Running parsers
     , parse
+    , parseWith
+    , parseTest
 
     -- * Combinators
     , (<?>)
+    , try
 
-    -- * Things vaguely like those in @Parsec.Combinator@ (and @Parsec.Prim@)
-    , pushBack
-
-    -- * Things like in @Parsec.Char@
+    -- * Parsing individual characters
     , satisfy
     , letter
     , digit
     , space
     , char
     , notChar
+
+    -- ** Character classes
+    , inClass
+    , notInClass
+
+    -- * Efficient string handling
     , string
+    , skipSpace
+    , skipWhile
+    , takeCount
+    , takeTill
+    , takeWhile
 
-    -- * Numeric parsers.
+    -- * Text parsing
+    , endOfLine
+
+    -- * Numeric parsers
     , int
     , integer
     , double
 
-    -- * Miscellaneous functions.
-    , takeWhile
-    , takeTill
-    , takeCount
-    , skipWhile
-    , skipSpace
-    , inClass
-    , notInClass
+    -- * State observation and manipulation functions
+    , endOfInput
+    , pushBack
+    , yield
 
+    -- * Combinators
     , module Data.Attoparsec.Combinator
     ) where
 
-import qualified Data.ByteString.Char8 as SB
 import qualified Data.ByteString.Lazy.Char8 as LB
 import Data.ByteString.Internal (w2c)
 import Data.Char (isDigit, isLetter, isSpace)
-import Data.Attoparsec.FastSet
-    (FastSet, memberChar, set)
+import Data.Attoparsec.FastSet (charClass, memberChar)
 import qualified Data.Attoparsec.Incremental as I
 import Data.Attoparsec.Incremental
-    (Parser, Result(..), (<?>), parse, pushBack,
-     string, takeCount)
+    (Parser, Result(..), (<?>), endOfInput, parse, parseWith, parseTest,
+     pushBack, string, takeCount, try, yield)
 import Data.ByteString.Lex.Lazy.Double (readDouble)
 import Prelude hiding (takeWhile)
 import Data.Attoparsec.Combinator
 isIntegral :: Char -> Bool
 isIntegral c = isDigit c || c == '-'
 
--- | Parse an integer.  The position counter is not updated.
+-- | Parse an 'Int'.
 int :: Parser r Int
 int = numeric "Int" isIntegral LB.readInt
 
--- | Parse an integer.  The position counter is not updated.
+-- | Parse an 'Integer'.
 integer :: Parser r Integer
 integer = numeric "Integer" isIntegral LB.readInteger
 
--- | Parse a Double.  The position counter is not updated.
+-- | Parse a 'Double'.
 double :: Parser r Double
 double = numeric "Double" isDouble readDouble
     where isDouble c = isIntegral c || c == 'e' || c == '+'
 
+-- | Match the end of a line.  This may be any of a newline character,
+-- a carriage return character, or a carriage return followed by a newline.
+endOfLine :: Parser r ()
+endOfLine = (char '\n' *> pure ()) <|> (string crlf *> pure ())
+    where crlf = LB.pack "\r\n"
+
 #define PARSER Parser r
 #include "../Char8Boilerplate.h"

File Data/Attoparsec/Internal.hs

 -----------------------------------------------------------------------------
 module Data.Attoparsec.Internal
     (
-    -- * Parser
+    -- * Parser types
       ParseError
     , Parser
 
 
     -- * Combinators
     , (<?>)
+    , try
 
-    -- * Things vaguely like those in @Parsec.Combinator@ (and @Parsec.Prim@)
-    , try
-    , endOfInput
-    , lookAhead
-    , peek
-
-    -- * Things like in @Parsec.Char@
+    -- * Parsing individual bytes
     , satisfy
     , anyWord8
     , word8
     , notWord8
+
+    -- * Efficient string handling
+    , match
+    , notEmpty
+    , skipWhile
     , string
     , stringTransform
-
-    -- * Parser converters.
-    , eitherP
-
-    -- * Miscellaneous functions.
-    , getInput
-    , getConsumed
-    , setInput
+    , takeAll
+    , takeCount
+    , takeTill
     , takeWhile
     , takeWhile1
-    , takeTill
-    , takeAll
-    , takeCount
-    , skipWhile
-    , notEmpty
-    , match
+
+    -- * State observation functions
+    , endOfInput
+    , getConsumed
+    , getInput
+    , lookAhead
+    , setInput
+
+    -- * Utilities
     , endOfLine
-
-    -- * Utilities.
     , (+:)
     ) where
 
 import Control.Applicative
+    (Alternative(..), Applicative(..), (*>))
 import Control.Monad (MonadPlus(..), ap)
 import Control.Monad.Fix (MonadFix(..))
 import qualified Data.ByteString as SB
 import Data.Word (Word8)
 import Prelude hiding (takeWhile)
 
+-- ^ A description of a parsing error.
 type ParseError = String
 
 -- State invariants:
            LB.ByteString
            {-# UNPACK #-} !Int64
 
+-- ^ A parser that produces a result of type @a@.
 newtype Parser a = Parser {
       unParser :: S -> Either (LB.ByteString, [String]) (a, S)
     }
 
 infix 0 <?>
 
--- | Name the parser.
-(<?>) :: Parser a -> String -> Parser a
+-- | Name the parser, in case failure occurs.
+(<?>) :: Parser a
+      -> String                 -- ^ the name to use if parsing fails
+      -> Parser a
 p <?> msg =
     Parser $ \s@(S sb lb _) ->
         case unParser p s of
 getConsumed :: Parser Int64
 getConsumed = Parser $ \s@(S _ _ n) -> Right (n, s)
 
--- | Character parser.
+-- | Match a single byte based on the given predicate.
 satisfy :: (Word8 -> Bool) -> Parser Word8
 satisfy p =
     Parser $ \s@(S sb lb n) ->
              Nothing -> unParser (nextChunk >> satisfy p) s
 {-# INLINE satisfy #-}
 
--- | Satisfy a literal string.
+-- | Match a literal string exactly.
 string :: LB.ByteString -> Parser LB.ByteString
 string s = Parser $ \(S sb lb n) ->
            let bs = sb +: lb
               else Left (bs, [])
 {-# INLINE string #-}
 
+-- | Match the end of a line.  This may be any of a newline character,
+-- a carriage return character, or a carriage return followed by a newline.
 endOfLine :: Parser ()
 endOfLine = Parser $ \(S sb lb n) ->
             let bs = sb +: lb
                                 else Right ((), mkState (LB.tail bs) (n + 1))
                      _ -> Left (bs, ["EOL"])
 
--- | Satisfy a literal string, after applying a transformation to both
--- it and the matching text.
+-- | Match a literal string, after applying a transformation to both
+-- it and the matching text.  Useful for e.g. case insensitive string
+-- comparison.
 stringTransform :: (LB.ByteString -> LB.ByteString) -> LB.ByteString
                 -> Parser LB.ByteString
 stringTransform f s = Parser $ \(S sb lb n) ->
     where fs = f s
 {-# INLINE stringTransform #-}
 
+-- | Attempt a parse, but do not consume any input if the parse fails.
 try :: Parser a -> Parser a
 try p = Parser $ \s@(S sb lb _) ->
         case unParser p s of
           Left (_, msgs) -> Left (sb +: lb, msgs)
           ok -> ok
 
--- | Detect 'end of file'.
+-- | Succeed if we have reached the end of the input string.
 endOfInput :: Parser ()
 endOfInput = Parser $ \s@(S sb lb _) -> if SB.null sb && LB.null lb
                                         then Right ((), s)
                                         else Left (sb +: lb, ["EOF"])
 
+-- | Return all of the remaining input as a single string.
 takeAll :: Parser LB.ByteString
 takeAll = Parser $ \(S sb lb n) ->
           let bs = sb +: lb
           in Right (bs, mkState LB.empty (n + LB.length bs))
 
+-- | Return exactly the given number of bytes.  If not enough are
+-- available, fail.
 takeCount :: Int -> Parser LB.ByteString
 takeCount k =
   Parser $ \(S sb lb n) ->
          then Right (h, mkState t (n + k'))
          else Left (bs, [show k ++ " bytes"])
 
--- | Consume characters while the predicate is true.
+-- | Consume bytes while the predicate succeeds.
 takeWhile :: (Word8 -> Bool) -> Parser LB.ByteString
 takeWhile p =
     Parser $ \(S sb lb n) ->
     in Right (h, mkState t (n + LB.length h))
 {-# INLINE takeWhile #-}
 
+-- | Consume bytes while the predicate fails.  If the predicate never
+-- succeeds, the entire input string is returned.
 takeTill :: (Word8 -> Bool) -> Parser LB.ByteString
 takeTill p =
   Parser $ \(S sb lb n) ->
-  case LB.break p (sb +: lb) of
-    (h,t) | LB.null t -> Left (h, [])
-          | otherwise -> Right (h, mkState t (n + LB.length h))
+  let (h,t) = LB.break p (sb +: lb)
+  in Right (h, mkState t (n + LB.length h))
 {-# INLINE takeTill #-}
 
+-- | Consume bytes while the predicate is true.  Fails if the
+-- predicate fails on the first byte.
 takeWhile1 :: (Word8 -> Bool) -> Parser LB.ByteString
 takeWhile1 p =
     Parser $ \(S sb lb n) ->
             | otherwise -> Right (h, mkState t (n + LB.length h))
 {-# INLINE takeWhile1 #-}
 
--- | Test that a parser returned a non-null ByteString.
+-- | Test that a parser returned a non-null 'LB.ByteString'.
 notEmpty :: Parser LB.ByteString -> Parser LB.ByteString 
 notEmpty p = Parser $ \s ->
              case unParser p s of
                    else o
                x -> x
 
--- | Parse some input with the given parser and return that input
--- without copying it.
+-- | Parse some input with the given parser, and return the input it
+-- consumed as a string.
 match :: Parser a -> Parser LB.ByteString
 match p = do bs <- getInput
              start <- getConsumed
              end <- getConsumed
              return (LB.take (end - start) bs)
 
-eitherP :: Parser a -> Parser b -> Parser (Either a b)
-eitherP a b = (Left <$> a) <|> (Right <$> b)
-{-# INLINE eitherP #-}
-
-peek :: Parser a -> Parser (Maybe a)
-peek p = Parser $ \s ->
-         case unParser p s of
-           Right (m, _) -> Right (Just m, s)
-           _ -> Right (Nothing, s)
-
+-- | Apply a parser without consuming any input.
 lookAhead :: Parser a -> Parser a
 lookAhead p = Parser $ \s ->
          case unParser p s of
            Right (m, _) -> Right (m, s)
-           Left (e, bs) -> Left (e, bs)
+           err -> err
 
-parseAt :: Parser a -> LB.ByteString -> Int64
+-- | Run a parser. The 'Int64' value is used as a base to count the
+-- number of bytes consumed.
+parseAt :: Parser a             -- ^ parser to run
+        -> LB.ByteString        -- ^ input to parse
+        -> Int64                -- ^ offset to count input from
         -> (LB.ByteString, Either ParseError (a, Int64))
 parseAt p bs n = 
     case unParser p (mkState bs n) of
       showError msgs = "Parser error, expected one of:\n" ++ unlines msgs
 
 -- | Run a parser.
-parse :: Parser a -> LB.ByteString
+parse :: Parser a               -- ^ parser to run
+      -> LB.ByteString          -- ^ input to parse
       -> (LB.ByteString, Either ParseError a)
 parse p bs = case parseAt p bs 0 of
                (bs', Right (a, _)) -> (bs', Right a)
                (bs', Left err) -> (bs', Left err)
 
+-- | Try out a parser, and print its result.
 parseTest :: (Show a) => Parser a -> LB.ByteString -> IO ()
 parseTest p s =
     case parse p s of

File Data/Attoparsec/Word8Boilerplate.h

     empty = zero
     (<|>) = plus
 
--- | Skip over characters while the predicate is true.
+-- | Skip over bytes while the predicate is true.
 skipWhile :: (Word8 -> Bool) -> PARSER ()
 skipWhile p = takeWhile p *> pure ()
 {-# INLINE skipWhile #-}
 
+-- | Match any byte.
 anyWord8 :: PARSER Word8
 anyWord8 = satisfy $ const True
 {-# INLINE anyWord8 #-}
 
--- | Satisfy a specific character.
+-- | Match a specific byte.
 word8 :: Word8 -> PARSER Word8
 word8 c = satisfy (== c) <?> show c
 {-# INLINE word8 #-}
 
--- | Satisfy a specific character.
+-- | Match any byte except the given one.
 notWord8 :: Word8 -> PARSER Word8
 notWord8 c = satisfy (/= c) <?> "not " ++ show c
 {-# INLINE notWord8 #-}

File attoparsec.cabal

                    Data.Attoparsec.Incremental
                    Data.Attoparsec.Incremental.Char8
                    Data.Attoparsec.FastSet
-                   Data.Attoparsec.Internal
+  other-modules:   Data.Attoparsec.Internal
   ghc-options:     -O2 -Wall -funbox-strict-fields
                    -fliberate-case-threshold=1000

File examples/RFC2616.hs

 {-# LANGUAGE OverloadedStrings #-}
 module RFC2616 where
 
-import Data.ParserCombinators.Attoparsec.Incremental.Char8
+import Data.Attoparsec.Incremental.Char8
 import Data.ByteString.Lazy.Char8 (ByteString)
 import qualified Data.ByteString.Lazy.Char8 as L
 import Data.ByteString.Char8 ()
 import Data.Char
 import Control.Monad
 import Prelude hiding (takeWhile)
+import Data.Time.Clock
+import Data.Time.Format
+import System.Locale
 
 date = rfc1123Date -- <|> rfc850Date <|> asctimeDate
 
+fallible :: Parser r (Maybe a) -> Parser r a
+fallible p = maybe mzero return =<< p
+
+rfc1123Date :: Parser r UTCTime
 rfc1123Date =
-    liftA3 (,,) (wkday <* string ", ") (date <* char ' ') (time <* string " GMT") <?> "RFC1123 date"
-  where wkday = oneWord "Mon Tue Wed Thu Fri Sat Sun"
-        oneWord = oneOf . map string . L.words
-        date = liftA3 (,,) (d2 <* char ' ') (month <* char ' ') d4
-        month = oneWord "Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec"
-        d2 = replicateM 2 (satisfy isDigit)
-        d4 = replicateM 4 (satisfy isDigit)
+  fallible (q <$> (manyTill anyChar (string " GMT")))
+    <?> "RFC1123 date"
+  where q = parseTime defaultTimeLocale "%a, %d %b %Y %T"
 
 time = liftA3 (,,) (d <* c) (d <* c) d <?> "time"
     where d = replicateM 2 (satisfy isDigit)
 header =
     (,) <$> (takeWhile fieldChar <* char ':' <* skipWhile space)
         <*> ((:) <$> tillEOL <*> many cont)
-    where tillEOL = takeTill (\c -> c == '\r' || c == '\n') <* eol
+    where tillEOL = takeTill newline <* eol
+          newline c = c == '\r' || c == '\n'
           fieldChar c = c /= ':' && c >= '!' && c <= '~' 
           cont = some (satisfy space) *> tillEOL
           space c = c == ' ' || c == '\t'