Commits

Bryan O'Sullivan committed e3bc107

Add support for the text package

Comments (0)

Files changed (21)

-^dist$
+^(?:cabal-dev|dist)$
+\.(?:aux|eventlog|h[ip]|log|[oa]|orig|prof|ps|rej|swp)$
+~$
 benchmarks/Arse
 benchmarks/med.txt
 benchmarks/tiny
 
 syntax: glob
-*.aux
-*.hi
-*.hp
-*.o
-*.orig
-*.out
-*.pdf
-*.prof
-*.ps
-*.rej
-*~
-.*.swp
 .\#*
 \#*

Data/Attoparsec.hs

 -- Stability   :  experimental
 -- Portability :  unknown
 --
--- Simple, efficient combinator parsing for 'B.ByteString' strings,
+-- Simple, efficient combinator parsing for 'ByteString' strings,
 -- loosely based on the Parsec library.
 
 module Data.Attoparsec
     (
-    -- * Differences from Parsec
-    -- $parsec
-
-    -- * Incremental input
-    -- $incremental
-
-    -- * Performance considerations
-    -- $performance
-
-    -- * Parser types
-      I.Parser
-    , Result(..)
-
-    -- ** Typeclass instances
-    -- $instances
-
-    -- * Running parsers
-    , parse
-    , feed
-    , I.parseOnly
-    , parseWith
-    , parseTest
-
-    -- ** Result conversion
-    , maybeResult
-    , eitherResult
-
-    -- * Combinators
-    , (I.<?>)
-    , I.try
-    , module Data.Attoparsec.Combinator
-
-    -- * Parsing individual bytes
-    , I.word8
-    , I.anyWord8
-    , I.notWord8
-    , I.satisfy
-    , I.satisfyWith
-    , I.skip
-
-    -- ** Byte classes
-    , I.inClass
-    , I.notInClass
-
-    -- * Efficient string handling
-    , I.string
-    , I.skipWhile
-    , I.take
-    , I.scan
-    , I.takeWhile
-    , I.takeWhile1
-    , I.takeTill
-
-    -- ** Consume all remaining input
-    , I.takeByteString
-    , I.takeLazyByteString
-
-    -- * State observation and manipulation functions
-    , I.endOfInput
-    , I.atEnd
+      module Data.Attoparsec.ByteString
     ) where
 
-import Data.Attoparsec.Combinator
-import qualified Data.Attoparsec.Internal as I
-import qualified Data.ByteString as B
-import Data.Attoparsec.Internal (Result(..), parse)
-
--- $parsec
---
--- Compared to Parsec 3, Attoparsec makes several tradeoffs.  It is
--- not intended for, or ideal for, all possible uses.
---
--- * While Attoparsec can consume input incrementally, Parsec cannot.
---   Incremental input is a huge deal for efficient and secure network
---   and system programming, since it gives much more control to users
---   of the library over matters such as resource usage and the I/O
---   model to use.
---
--- * Much of the performance advantage of Attoparsec is gained via
---   high-performance parsers such as 'I.takeWhile' and 'I.string'.
---   If you use complicated combinators that return lists of bytes or
---   characters, there is less performance difference between the two
---   libraries.
---
--- * Unlike Parsec 3, Attoparsec does not support being used as a
---   monad transformer.
---
--- * Attoparsec is specialised to deal only with strict 'B.ByteString'
---   input.  Efficiency concerns rule out both lists and lazy
---   bytestrings.  The usual use for lazy bytestrings would be to
---   allow consumption of very large input without a large footprint.
---   For this need, Attoparsec's incremental input provides an
---   excellent substitute, with much more control over when input
---   takes place.  If you must use lazy bytestrings, see the 'Lazy'
---   module, which feeds lazy chunks to a regular parser.
---
--- * Parsec parsers can produce more helpful error messages than
---   Attoparsec parsers.  This is a matter of focus: Attoparsec avoids
---   the extra book-keeping in favour of higher performance.
-
--- $incremental
---
--- Attoparsec supports incremental input, meaning that you can feed it
--- a bytestring that represents only part of the expected total amount
--- of data to parse. If your parser reaches the end of a fragment of
--- input and could consume more input, it will suspend parsing and
--- return a 'Partial' continuation.
---
--- Supplying the 'Partial' continuation with another bytestring will
--- resume parsing at the point where it was suspended. You must be
--- prepared for the result of the resumed parse to be another
--- 'Partial' continuation.
---
--- To indicate that you have no more input, supply the 'Partial'
--- continuation with an empty bytestring.
---
--- Remember that some parsing combinators will not return a result
--- until they reach the end of input.  They may thus cause 'Partial'
--- results to be returned.
---
--- If you do not need support for incremental input, consider using
--- the 'I.parseOnly' function to run your parser.  It will never
--- prompt for more input.
-
--- $performance
---
--- If you write an Attoparsec-based parser carefully, it can be
--- realistic to expect it to perform within a factor of 2 of a
--- hand-rolled C parser (measuring megabytes parsed per second).
---
--- To actually achieve high performance, there are a few guidelines
--- that it is useful to follow.
---
--- Use the 'B.ByteString'-oriented parsers whenever possible,
--- e.g. 'I.takeWhile1' instead of 'many1' 'I.anyWord8'.  There is
--- about a factor of 100 difference in performance between the two
--- kinds of parser.
---
--- For very simple byte-testing predicates, write them by hand instead
--- of using 'I.inClass' or 'I.notInClass'.  For instance, both of
--- these predicates test for an end-of-line byte, but the first is
--- much faster than the second:
---
--- >endOfLine_fast w = w == 13 || w == 10
--- >endOfLine_slow   = inClass "\r\n"
---
--- Make active use of benchmarking and profiling tools to measure,
--- find the problems with, and improve the performance of your parser.
-
--- $instances
---
--- The 'I.Parser' type is an instance of the following classes:
---
--- * 'Monad', where 'fail' throws an exception (i.e. fails) with an
---   error message.
---
--- * 'Functor' and 'Applicative', which follow the usual definitions.
---
--- * 'MonadPlus', where 'mzero' fails (with no error message) and
---   'mplus' executes the right-hand parser if the left-hand one
---   fails.
---
--- * 'Alternative', which follows 'MonadPlus'.
---
--- The 'Result' type is an instance of 'Functor', where 'fmap'
--- transforms the value in a 'Done' result.
-
--- | If a parser has returned a 'Partial' result, supply it with more
--- input.
-feed :: Result r -> B.ByteString -> Result r
-feed f@(Fail _ _ _) _ = f
-feed (Partial k) d    = k d
-feed (Done bs r) d    = Done (B.append bs d) r
-{-# INLINE feed #-}
-
--- | Run a parser and print its result to standard output.
-parseTest :: (Show a) => I.Parser a -> B.ByteString -> IO ()
-parseTest p s = print (parse p s)
-
--- | Run a parser with an initial input string, and a monadic action
--- that can supply more input if needed.
-parseWith :: Monad m =>
-             (m B.ByteString)
-          -- ^ An action that will be executed to provide the parser
-          -- with more input, if necessary.  The action must return an
-          -- 'B.empty' string when there is no more input available.
-          -> I.Parser a
-          -> B.ByteString
-          -- ^ Initial input for the parser.
-          -> m (Result a)
-parseWith refill p s = step $ parse p s
-  where step (Partial k) = (step . k) =<< refill
-        step r           = return r
-{-# INLINE parseWith #-}
-
--- | Convert a 'Result' value to a 'Maybe' value. A 'Partial' result
--- is treated as failure.
-maybeResult :: Result r -> Maybe r
-maybeResult (Done _ r) = Just r
-maybeResult _          = Nothing
-
--- | Convert a 'Result' value to an 'Either' value. A 'Partial' result
--- is treated as failure.
-eitherResult :: Result r -> Either String r
-eitherResult (Done _ r)     = Right r
-eitherResult (Fail _ _ msg) = Left msg
-eitherResult _              = Left "Result: incomplete input"
+import Data.Attoparsec.ByteString

Data/Attoparsec/ByteString.hs

+-- |
+-- Module      :  Data.Attoparsec.ByteString
+-- Copyright   :  Bryan O'Sullivan 2007-2011
+-- License     :  BSD3
+-- 
+-- Maintainer  :  bos@serpentine.com
+-- Stability   :  experimental
+-- Portability :  unknown
+--
+-- Simple, efficient combinator parsing for 'B.ByteString' strings,
+-- loosely based on the Parsec library.
+
+module Data.Attoparsec.ByteString
+    (
+    -- * Differences from Parsec
+    -- $parsec
+
+    -- * Incremental input
+    -- $incremental
+
+    -- * Performance considerations
+    -- $performance
+
+    -- * Parser types
+      I.Parser
+    , Result
+    , T.IResult(..)
+
+    -- * Running parsers
+    , parse
+    , feed
+    , I.parseOnly
+    , parseWith
+    , parseTest
+
+    -- ** Result conversion
+    , maybeResult
+    , eitherResult
+
+    -- * Combinators
+    , (I.<?>)
+    , I.try
+    , module Data.Attoparsec.Combinator
+
+    -- * Parsing individual bytes
+    , I.word8
+    , I.anyWord8
+    , I.notWord8
+    , I.satisfy
+    , I.satisfyWith
+    , I.skip
+
+    -- ** Byte classes
+    , I.inClass
+    , I.notInClass
+
+    -- * Efficient string handling
+    , I.string
+    , I.skipWhile
+    , I.take
+    , I.scan
+    , I.takeWhile
+    , I.takeWhile1
+    , I.takeTill
+
+    -- ** Consume all remaining input
+    , I.takeByteString
+    , I.takeLazyByteString
+
+    -- * State observation and manipulation functions
+    , I.endOfInput
+    , I.atEnd
+    ) where
+
+import Data.Attoparsec.Combinator
+import qualified Data.Attoparsec.ByteString.Internal as I
+import qualified Data.ByteString as B
+import Data.Attoparsec.ByteString.Internal (Result, parse)
+import qualified Data.Attoparsec.Internal.Types as T
+
+-- $parsec
+--
+-- Compared to Parsec 3, Attoparsec makes several tradeoffs.  It is
+-- not intended for, or ideal for, all possible uses.
+--
+-- * While Attoparsec can consume input incrementally, Parsec cannot.
+--   Incremental input is a huge deal for efficient and secure network
+--   and system programming, since it gives much more control to users
+--   of the library over matters such as resource usage and the I/O
+--   model to use.
+--
+-- * Much of the performance advantage of Attoparsec is gained via
+--   high-performance parsers such as 'I.takeWhile' and 'I.string'.
+--   If you use complicated combinators that return lists of bytes or
+--   characters, there is less performance difference between the two
+--   libraries.
+--
+-- * Unlike Parsec 3, Attoparsec does not support being used as a
+--   monad transformer.
+--
+-- * Attoparsec is specialised to deal only with strict 'B.ByteString'
+--   input.  Efficiency concerns rule out both lists and lazy
+--   bytestrings.  The usual use for lazy bytestrings would be to
+--   allow consumption of very large input without a large footprint.
+--   For this need, Attoparsec's incremental input provides an
+--   excellent substitute, with much more control over when input
+--   takes place.  If you must use lazy bytestrings, see the 'Lazy'
+--   module, which feeds lazy chunks to a regular parser.
+--
+-- * Parsec parsers can produce more helpful error messages than
+--   Attoparsec parsers.  This is a matter of focus: Attoparsec avoids
+--   the extra book-keeping in favour of higher performance.
+
+-- $incremental
+--
+-- Attoparsec supports incremental input, meaning that you can feed it
+-- a bytestring that represents only part of the expected total amount
+-- of data to parse. If your parser reaches the end of a fragment of
+-- input and could consume more input, it will suspend parsing and
+-- return a 'T.Partial' continuation.
+--
+-- Supplying the 'T.Partial' continuation with another bytestring will
+-- resume parsing at the point where it was suspended. You must be
+-- prepared for the result of the resumed parse to be another
+-- 'T.Partial' continuation.
+--
+-- To indicate that you have no more input, supply the 'T.Partial'
+-- continuation with an empty bytestring.
+--
+-- Remember that some parsing combinators will not return a result
+-- until they reach the end of input.  They may thus cause 'T.Partial'
+-- results to be returned.
+--
+-- If you do not need support for incremental input, consider using
+-- the 'I.parseOnly' function to run your parser.  It will never
+-- prompt for more input.
+
+-- $performance
+--
+-- If you write an Attoparsec-based parser carefully, it can be
+-- realistic to expect it to perform within a factor of 2 of a
+-- hand-rolled C parser (measuring megabytes parsed per second).
+--
+-- To actually achieve high performance, there are a few guidelines
+-- that it is useful to follow.
+--
+-- Use the 'B.ByteString'-oriented parsers whenever possible,
+-- e.g. 'I.takeWhile1' instead of 'many1' 'I.anyWord8'.  There is
+-- about a factor of 100 difference in performance between the two
+-- kinds of parser.
+--
+-- For very simple byte-testing predicates, write them by hand instead
+-- of using 'I.inClass' or 'I.notInClass'.  For instance, both of
+-- these predicates test for an end-of-line byte, but the first is
+-- much faster than the second:
+--
+-- >endOfLine_fast w = w == 13 || w == 10
+-- >endOfLine_slow   = inClass "\r\n"
+--
+-- Make active use of benchmarking and profiling tools to measure,
+-- find the problems with, and improve the performance of your parser.
+
+-- | If a parser has returned a 'T.Partial' result, supply it with more
+-- input.
+feed :: Result r -> B.ByteString -> Result r
+feed f@(T.Fail _ _ _) _ = f
+feed (T.Partial k) d    = k d
+feed (T.Done bs r) d    = T.Done (B.append bs d) r
+{-# INLINE feed #-}
+
+-- | Run a parser and print its result to standard output.
+parseTest :: (Show a) => I.Parser a -> B.ByteString -> IO ()
+parseTest p s = print (parse p s)
+
+-- | Run a parser with an initial input string, and a monadic action
+-- that can supply more input if needed.
+parseWith :: Monad m =>
+             (m B.ByteString)
+          -- ^ An action that will be executed to provide the parser
+          -- with more input, if necessary.  The action must return an
+          -- 'B.empty' string when there is no more input available.
+          -> I.Parser a
+          -> B.ByteString
+          -- ^ Initial input for the parser.
+          -> m (Result a)
+parseWith refill p s = step $ parse p s
+  where step (T.Partial k) = (step . k) =<< refill
+        step r             = return r
+{-# INLINE parseWith #-}
+
+-- | Convert a 'Result' value to a 'Maybe' value. A 'T.Partial' result
+-- is treated as failure.
+maybeResult :: Result r -> Maybe r
+maybeResult (T.Done _ r) = Just r
+maybeResult _            = Nothing
+
+-- | Convert a 'Result' value to an 'Either' value. A 'T.Partial'
+-- result is treated as failure.
+eitherResult :: Result r -> Either String r
+eitherResult (T.Done _ r)     = Right r
+eitherResult (T.Fail _ _ msg) = Left msg
+eitherResult _                = Left "Result: incomplete input"

Data/Attoparsec/ByteString/Char8.hs

+{-# LANGUAGE BangPatterns, FlexibleInstances, TypeSynonymInstances #-}
+{-# OPTIONS_GHC -fno-warn-orphans #-}
+
+-- |
+-- Module      :  Data.Attoparsec.ByteString.Char8
+-- Copyright   :  Bryan O'Sullivan 2007-2011
+-- License     :  BSD3
+-- 
+-- Maintainer  :  bos@serpentine.com
+-- Stability   :  experimental
+-- Portability :  unknown
+--
+-- Simple, efficient, character-oriented combinator parsing for
+-- 'B.ByteString' strings, loosely based on the Parsec library.
+
+module Data.Attoparsec.ByteString.Char8
+    (
+    -- * Character encodings
+    -- $encodings
+
+    -- * Parser types
+      Parser
+    , A.Result
+
+    -- * Running parsers
+    , A.parse
+    , A.feed
+    , A.parseOnly
+    , A.parseTest
+    , A.parseWith
+
+    -- ** Result conversion
+    , A.maybeResult
+    , A.eitherResult
+
+    -- * Combinators
+    , (I.<?>)
+    , I.try
+    , module Data.Attoparsec.Combinator
+
+    -- * Parsing individual characters
+    , char
+    , char8
+    , anyChar
+    , notChar
+    , satisfy
+
+    -- ** Special character parsers
+    , digit
+    , letter_iso8859_15
+    , letter_ascii
+    , space
+
+    -- ** Fast predicates
+    , isDigit
+    , isDigit_w8
+    , isAlpha_iso8859_15
+    , isAlpha_ascii
+    , isSpace
+    , isSpace_w8
+
+    -- *** Character classes
+    , inClass
+    , notInClass
+
+    -- * Efficient string handling
+    , I.string
+    , stringCI
+    , skipSpace
+    , skipWhile
+    , I.take
+    , scan
+    , takeWhile
+    , takeWhile1
+    , takeTill
+
+    -- ** Consume all remaining input
+    , I.takeByteString
+    , I.takeLazyByteString
+
+    -- * Text parsing
+    , I.endOfLine
+    , isEndOfLine
+    , isHorizontalSpace
+
+    -- * Numeric parsers
+    , decimal
+    , hexadecimal
+    , signed
+    , double
+    , Number(..)
+    , number
+    , rational
+
+    -- * State observation and manipulation functions
+    , I.endOfInput
+    , I.atEnd
+    ) where
+
+import Control.Applicative ((*>), (<$>), (<|>))
+import Data.Attoparsec.ByteString.FastSet (charClass, memberChar)
+import Data.Attoparsec.ByteString.Internal (Parser, (<?>))
+import Data.Attoparsec.Combinator
+import Data.Attoparsec.Number (Number(..))
+import Data.Bits (Bits, (.|.), shiftL)
+import Data.ByteString.Internal (c2w, w2c)
+import Data.Int (Int8, Int16, Int32, Int64)
+import Data.Ratio ((%))
+import Data.String (IsString(..))
+import Data.Word (Word8, Word16, Word32, Word64, Word)
+import Prelude hiding (takeWhile)
+import qualified Data.Attoparsec.ByteString as A
+import qualified Data.Attoparsec.ByteString.Internal as I
+import qualified Data.ByteString as B8
+import qualified Data.ByteString.Char8 as B
+
+instance IsString (Parser B.ByteString) where
+    fromString = I.string . B.pack
+
+-- $encodings
+--
+-- This module is intended for parsing text that is
+-- represented using an 8-bit character set, e.g. ASCII or
+-- ISO-8859-15.  It /does not/ make any attempt to deal with character
+-- encodings, multibyte characters, or wide characters.  In
+-- particular, all attempts to use characters above code point U+00FF
+-- will give wrong answers.
+--
+-- Code points below U+0100 are simply translated to and from their
+-- numeric values, so e.g. the code point U+00A4 becomes the byte
+-- @0xA4@ (which is the Euro symbol in ISO-8859-15, but the generic
+-- currency sign in ISO-8859-1).  Haskell 'Char' values above U+00FF
+-- are truncated, so e.g. U+1D6B7 is truncated to the byte @0xB7@.
+
+-- ASCII-specific but fast, oh yes.
+toLower :: Word8 -> Word8
+toLower w | w >= 65 && w <= 90 = w + 32
+          | otherwise          = w
+
+-- | Satisfy a literal string, ignoring case.
+stringCI :: B.ByteString -> Parser B.ByteString
+stringCI = I.stringTransform (B8.map toLower)
+{-# INLINE stringCI #-}
+
+-- | Consume input as long as the predicate returns 'True', and return
+-- the consumed input.
+--
+-- This parser requires the predicate to succeed on at least one byte
+-- of input: it will fail if the predicate never returns 'True' or if
+-- there is no input left.
+takeWhile1 :: (Char -> Bool) -> Parser B.ByteString
+takeWhile1 p = I.takeWhile1 (p . w2c)
+{-# INLINE takeWhile1 #-}
+
+-- | The parser @satisfy p@ succeeds for any byte for which the
+-- predicate @p@ returns 'True'. Returns the byte that is actually
+-- parsed.
+--
+-- >digit = satisfy isDigit
+-- >    where isDigit c = c >= '0' && c <= '9'
+satisfy :: (Char -> Bool) -> Parser Char
+satisfy = I.satisfyWith w2c
+{-# INLINE satisfy #-}
+
+-- | Match a letter, in the ISO-8859-15 encoding.
+letter_iso8859_15 :: Parser Char
+letter_iso8859_15 = satisfy isAlpha_iso8859_15 <?> "letter_iso8859_15"
+{-# INLINE letter_iso8859_15 #-}
+
+-- | Match a letter, in the ASCII encoding.
+letter_ascii :: Parser Char
+letter_ascii = satisfy isAlpha_ascii <?> "letter_ascii"
+{-# INLINE letter_ascii #-}
+
+-- | A fast alphabetic predicate for the ISO-8859-15 encoding
+--
+-- /Note/: For all character encodings other than ISO-8859-15, and
+-- almost all Unicode code points above U+00A3, this predicate gives
+-- /wrong answers/.
+isAlpha_iso8859_15 :: Char -> Bool
+isAlpha_iso8859_15 c = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
+                       (c >= '\166' && moby c)
+  where moby = notInClass "\167\169\171-\179\182\183\185\187\191\215\247"
+        {-# NOINLINE moby #-}
+{-# INLINE isAlpha_iso8859_15 #-}
+
+-- | A fast alphabetic predicate for the ASCII encoding
+--
+-- /Note/: For all character encodings other than ASCII, and
+-- almost all Unicode code points above U+007F, this predicate gives
+-- /wrong answers/.
+isAlpha_ascii :: Char -> Bool
+isAlpha_ascii c = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
+{-# INLINE isAlpha_ascii #-}
+
+-- | Parse a single digit.
+digit :: Parser Char
+digit = satisfy isDigit <?> "digit"
+{-# INLINE digit #-}
+
+-- | A fast digit predicate.
+isDigit :: Char -> Bool
+isDigit c = c >= '0' && c <= '9'
+{-# INLINE isDigit #-}
+
+-- | A fast digit predicate.
+isDigit_w8 :: Word8 -> Bool
+isDigit_w8 w = w >= 48 && w <= 57
+{-# INLINE isDigit_w8 #-}
+
+-- | Match any character.
+anyChar :: Parser Char
+anyChar = satisfy $ const True
+{-# INLINE anyChar #-}
+
+-- | Fast predicate for matching ASCII space characters.
+--
+-- /Note/: This predicate only gives correct answers for the ASCII
+-- encoding.  For instance, it does not recognise U+00A0 (non-breaking
+-- space) as a space character, even though it is a valid ISO-8859-15
+-- byte. For a Unicode-aware and only slightly slower predicate,
+-- use 'Data.Char.isSpace'
+isSpace :: Char -> Bool
+isSpace c = (c == ' ') || ('\t' <= c && c <= '\r')
+{-# INLINE isSpace #-}
+
+-- | Fast 'Word8' predicate for matching ASCII space characters.
+isSpace_w8 :: Word8 -> Bool
+isSpace_w8 w = (w == 32) || (9 <= w && w <= 13)
+{-# INLINE isSpace_w8 #-}
+
+
+-- | Parse a space character.
+--
+-- /Note/: This parser only gives correct answers for the ASCII
+-- encoding.  For instance, it does not recognise U+00A0 (non-breaking
+-- space) as a space character, even though it is a valid ISO-8859-15
+-- byte.
+space :: Parser Char
+space = satisfy isSpace <?> "space"
+{-# INLINE space #-}
+
+-- | Match a specific character.
+char :: Char -> Parser Char
+char c = satisfy (== c) <?> [c]
+{-# INLINE char #-}
+
+-- | Match a specific character, but return its 'Word8' value.
+char8 :: Char -> Parser Word8
+char8 c = I.satisfy (== c2w c) <?> [c]
+{-# INLINE char8 #-}
+
+-- | Match any character except the given one.
+notChar :: Char -> Parser Char
+notChar c = satisfy (/= c) <?> "not " ++ [c]
+{-# INLINE notChar #-}
+
+-- | Match any character in a set.
+--
+-- >vowel = inClass "aeiou"
+--
+-- Range notation is supported.
+--
+-- >halfAlphabet = inClass "a-nA-N"
+--
+-- To add a literal \'-\' to a set, place it at the beginning or end
+-- of the string.
+inClass :: String -> Char -> Bool
+inClass s = (`memberChar` mySet)
+    where mySet = charClass s
+{-# INLINE inClass #-}
+
+-- | Match any character not in a set.
+notInClass :: String -> Char -> Bool
+notInClass s = not . inClass s
+{-# INLINE notInClass #-}
+
+-- | Consume input as long as the predicate returns 'True', and return
+-- the consumed input.
+--
+-- This parser does not fail.  It will return an empty string if the
+-- predicate returns 'False' on the first byte of input.
+--
+-- /Note/: Because this parser does not fail, do not use it with
+-- combinators such as 'many', because such parsers loop until a
+-- failure occurs.  Careless use will thus result in an infinite loop.
+takeWhile :: (Char -> Bool) -> Parser B.ByteString
+takeWhile p = I.takeWhile (p . w2c)
+{-# INLINE takeWhile #-}
+
+-- | A stateful scanner.  The predicate consumes and transforms a
+-- state argument, and each transformed state is passed to successive
+-- invocations of the predicate on each byte of the input until one
+-- returns 'Nothing' or the input ends.
+--
+-- This parser does not fail.  It will return an empty string if the
+-- predicate returns 'Nothing' on the first byte of input.
+--
+-- /Note/: Because this parser does not fail, do not use it with
+-- combinators such as 'many', because such parsers loop until a
+-- failure occurs.  Careless use will thus result in an infinite loop.
+scan :: s -> (s -> Char -> Maybe s) -> Parser B.ByteString
+scan s0 p = I.scan s0 (\s -> p s . w2c)
+{-# INLINE scan #-}
+
+-- | Consume input as long as the predicate returns 'False'
+-- (i.e. until it returns 'True'), and return the consumed input.
+--
+-- This parser does not fail.  It will return an empty string if the
+-- predicate returns 'True' on the first byte of input.
+--
+-- /Note/: Because this parser does not fail, do not use it with
+-- combinators such as 'many', because such parsers loop until a
+-- failure occurs.  Careless use will thus result in an infinite loop.
+takeTill :: (Char -> Bool) -> Parser B.ByteString
+takeTill p = I.takeTill (p . w2c)
+{-# INLINE takeTill #-}
+
+-- | Skip past input for as long as the predicate returns 'True'.
+skipWhile :: (Char -> Bool) -> Parser ()
+skipWhile p = I.skipWhile (p . w2c)
+{-# INLINE skipWhile #-}
+
+-- | Skip over white space.
+skipSpace :: Parser ()
+skipSpace = I.skipWhile isSpace_w8
+{-# INLINE skipSpace #-}
+
+-- | A predicate that matches either a carriage return @\'\\r\'@ or
+-- newline @\'\\n\'@ character.
+isEndOfLine :: Word8 -> Bool
+isEndOfLine w = w == 13 || w == 10
+{-# INLINE isEndOfLine #-}
+
+-- | A predicate that matches either a space @\' \'@ or horizontal tab
+-- @\'\\t\'@ character.
+isHorizontalSpace :: Word8 -> Bool
+isHorizontalSpace w = w == 32 || w == 9
+{-# INLINE isHorizontalSpace #-}
+
+-- | Parse and decode an unsigned hexadecimal number.  The hex digits
+-- @\'a\'@ through @\'f\'@ may be upper or lower case.
+--
+-- This parser does not accept a leading @\"0x\"@ string.
+hexadecimal :: (Integral a, Bits a) => Parser a
+hexadecimal = B8.foldl' step 0 `fmap` I.takeWhile1 isHexDigit
+  where
+    isHexDigit w = (w >= 48 && w <= 57) ||
+                   (w >= 97 && w <= 102) ||
+                   (w >= 65 && w <= 90)
+    step a w | w >= 48 && w <= 57  = (a `shiftL` 4) .|. fromIntegral (w - 48)
+             | w >= 97             = (a `shiftL` 4) .|. fromIntegral (w - 87)
+             | otherwise           = (a `shiftL` 4) .|. fromIntegral (w - 55)
+{-# SPECIALISE hexadecimal :: Parser Int #-}
+{-# SPECIALISE hexadecimal :: Parser Int8 #-}
+{-# SPECIALISE hexadecimal :: Parser Int16 #-}
+{-# SPECIALISE hexadecimal :: Parser Int32 #-}
+{-# SPECIALISE hexadecimal :: Parser Int64 #-}
+{-# SPECIALISE hexadecimal :: Parser Integer #-}
+{-# SPECIALISE hexadecimal :: Parser Word #-}
+{-# SPECIALISE hexadecimal :: Parser Word8 #-}
+{-# SPECIALISE hexadecimal :: Parser Word16 #-}
+{-# SPECIALISE hexadecimal :: Parser Word32 #-}
+{-# SPECIALISE hexadecimal :: Parser Word64 #-}
+
+-- | Parse and decode an unsigned decimal number.
+decimal :: Integral a => Parser a
+decimal = B8.foldl' step 0 `fmap` I.takeWhile1 isDig
+  where isDig w  = w >= 48 && w <= 57
+        step a w = a * 10 + fromIntegral (w - 48)
+{-# SPECIALISE decimal :: Parser Int #-}
+{-# SPECIALISE decimal :: Parser Int8 #-}
+{-# SPECIALISE decimal :: Parser Int16 #-}
+{-# SPECIALISE decimal :: Parser Int32 #-}
+{-# SPECIALISE decimal :: Parser Int64 #-}
+{-# SPECIALISE decimal :: Parser Integer #-}
+{-# SPECIALISE decimal :: Parser Word #-}
+{-# SPECIALISE decimal :: Parser Word8 #-}
+{-# SPECIALISE decimal :: Parser Word16 #-}
+{-# SPECIALISE decimal :: Parser Word32 #-}
+{-# SPECIALISE decimal :: Parser Word64 #-}
+
+-- | Parse a number with an optional leading @\'+\'@ or @\'-\'@ sign
+-- character.
+signed :: Num a => Parser a -> Parser a
+{-# SPECIALISE signed :: Parser Int -> Parser Int #-}
+{-# SPECIALISE signed :: Parser Int8 -> Parser Int8 #-}
+{-# SPECIALISE signed :: Parser Int16 -> Parser Int16 #-}
+{-# SPECIALISE signed :: Parser Int32 -> Parser Int32 #-}
+{-# SPECIALISE signed :: Parser Int64 -> Parser Int64 #-}
+{-# SPECIALISE signed :: Parser Integer -> Parser Integer #-}
+signed p = (negate <$> (char8 '-' *> p))
+       <|> (char8 '+' *> p)
+       <|> p
+
+-- | Parse a rational number.
+--
+-- This parser accepts an optional leading sign character, followed by
+-- at least one decimal digit.  The syntax similar to that accepted by
+-- the 'read' function, with the exception that a trailing @\'.\'@ or
+-- @\'e\'@ /not/ followed by a number is not consumed.
+--
+-- Examples with behaviour identical to 'read', if you feed an empty
+-- continuation to the first result:
+--
+-- >rational "3"     == Done 3.0 ""
+-- >rational "3.1"   == Done 3.1 ""
+-- >rational "3e4"   == Done 30000.0 ""
+-- >rational "3.1e4" == Done 31000.0, ""
+
+-- Examples with behaviour identical to 'read':
+--
+-- >rational ".3"    == Fail "input does not start with a digit"
+-- >rational "e3"    == Fail "input does not start with a digit"
+--
+-- Examples of differences from 'read':
+--
+-- >rational "3.foo" == Done 3.0 ".foo"
+-- >rational "3e"    == Done 3.0 "e"
+--
+-- This function does not accept string representations of \"NaN\" or
+-- \"Infinity\".
+rational :: Fractional a => Parser a
+{-# SPECIALIZE rational :: Parser Double #-}
+{-# SPECIALIZE rational :: Parser Float #-}
+{-# SPECIALIZE rational :: Parser Rational #-}
+rational = floaty $ \real frac fracDenom -> fromRational $
+                     real % 1 + frac % fracDenom
+
+-- | Parse a rational number.
+--
+-- The syntax accepted by this parser is the same as for 'rational'.
+--
+-- /Note/: This function is almost ten times faster than 'rational',
+-- but is slightly less accurate.
+--
+-- The 'Double' type supports about 16 decimal places of accuracy.
+-- For 94.2% of numbers, this function and 'rational' give identical
+-- results, but for the remaining 5.8%, this function loses precision
+-- around the 15th decimal place.  For 0.001% of numbers, this
+-- function will lose precision at the 13th or 14th decimal place.
+--
+-- This function does not accept string representations of \"NaN\" or
+-- \"Infinity\".
+double :: Parser Double
+double = floaty asDouble
+
+asDouble :: Integer -> Integer -> Integer -> Double
+asDouble real frac fracDenom =
+    fromIntegral real + fromIntegral frac / fromIntegral fracDenom
+{-# INLINE asDouble #-}
+
+-- | Parse a number, attempting to preserve both speed and precision.
+--
+-- The syntax accepted by this parser is the same as for 'rational'.
+--
+-- /Note/: This function is almost ten times faster than 'rational'.
+-- On integral inputs, it gives perfectly accurate answers, and on
+-- floating point inputs, it is slightly less accurate than
+-- 'rational'.
+--
+-- This function does not accept string representations of \"NaN\" or
+-- \"Infinity\".
+number :: Parser Number
+number = floaty $ \real frac fracDenom ->
+         if frac == 0 && fracDenom == 0
+         then I real
+         else D (asDouble real frac fracDenom)
+{-# INLINE number #-}
+
+data T = T !Integer !Int
+
+floaty :: Fractional a => (Integer -> Integer -> Integer -> a) -> Parser a
+{-# INLINE floaty #-}
+floaty f = do
+  let minus = 45
+      plus  = 43
+  !positive <- ((== plus) <$> I.satisfy (\c -> c == minus || c == plus)) <|>
+               return True
+  real <- decimal
+  let tryFraction = do
+        let dot = 46
+        _ <- I.satisfy (==dot)
+        ds <- I.takeWhile isDigit_w8
+        case I.parseOnly decimal ds of
+                Right n -> return $ T n (B.length ds)
+                _       -> fail "no digits after decimal"
+  T fraction fracDigits <- tryFraction <|> return (T 0 0)
+  let littleE = 101
+      bigE    = 69
+      e w = w == littleE || w == bigE
+  power <- (I.satisfy e *> signed decimal) <|> return (0::Int)
+  let n = if fracDigits == 0
+          then if power == 0
+               then fromIntegral real
+               else fromIntegral real * (10 ^^ power)
+          else if power == 0
+               then f real fraction (10 ^ fracDigits)
+               else f real fraction (10 ^ fracDigits) * (10 ^^ power)
+  return $ if positive
+           then n
+           else -n

Data/Attoparsec/ByteString/FastSet.hs

+{-# LANGUAGE BangPatterns, MagicHash #-}
+
+-----------------------------------------------------------------------------
+-- |
+-- Module      :  Data.Attoparsec.ByteString.FastSet
+-- Copyright   :  Bryan O'Sullivan 2008
+-- License     :  BSD3
+-- 
+-- Maintainer  :  bos@serpentine.com
+-- Stability   :  experimental
+-- Portability :  unknown
+--
+-- Fast set membership tests for 'Word8' and 8-bit 'Char' values.  The
+-- set representation is unboxed for efficiency.  For small sets, we
+-- test for membership using a binary search.  For larger sets, we use
+-- a lookup table.
+-- 
+-----------------------------------------------------------------------------
+module Data.Attoparsec.ByteString.FastSet
+    (
+    -- * Data type
+      FastSet
+    -- * Construction
+    , fromList
+    , set
+    -- * Lookup
+    , memberChar
+    , memberWord8
+    -- * Debugging
+    , fromSet
+    -- * Handy interface
+    , charClass
+    ) where
+
+import Data.Bits ((.&.), (.|.))
+import Foreign.Storable (peekByteOff, pokeByteOff)
+import GHC.Base (Int(I#), iShiftRA#, narrow8Word#, shiftL#)
+import GHC.Word (Word8(W8#))
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Char8 as B8
+import qualified Data.ByteString.Internal as I
+import qualified Data.ByteString.Unsafe as U
+
+data FastSet = Sorted { fromSet :: !B.ByteString }
+             | Table  { fromSet :: !B.ByteString }
+    deriving (Eq, Ord)
+
+instance Show FastSet where
+    show (Sorted s) = "FastSet Sorted " ++ show (B8.unpack s)
+    show (Table _) = "FastSet Table"
+
+-- | The lower bound on the size of a lookup table.  We choose this to
+-- balance table density against performance.
+tableCutoff :: Int
+tableCutoff = 8
+
+-- | Create a set.
+set :: B.ByteString -> FastSet
+set s | B.length s < tableCutoff = Sorted . B.sort $ s
+      | otherwise                = Table . mkTable $ s
+
+fromList :: [Word8] -> FastSet
+fromList = set . B.pack
+
+data I = I {-# UNPACK #-} !Int {-# UNPACK #-} !Word8
+
+shiftR :: Int -> Int -> Int
+shiftR (I# x#) (I# i#) = I# (x# `iShiftRA#` i#)
+
+shiftL :: Word8 -> Int -> Word8
+shiftL (W8# x#) (I# i#) = W8# (narrow8Word# (x# `shiftL#` i#))
+
+index :: Int -> I
+index i = I (i `shiftR` 3) (1 `shiftL` (i .&. 7))
+{-# INLINE index #-}
+
+-- | Check the set for membership.
+memberWord8 :: Word8 -> FastSet -> Bool
+memberWord8 w (Table t)  =
+    let I byte bit = index (fromIntegral w)
+    in  U.unsafeIndex t byte .&. bit /= 0
+memberWord8 w (Sorted s) = search 0 (B.length s - 1)
+    where search lo hi
+              | hi < lo = False
+              | otherwise =
+                  let mid = (lo + hi) `div` 2
+                  in case compare w (U.unsafeIndex s mid) of
+                       GT -> search (mid + 1) hi
+                       LT -> search lo (mid - 1)
+                       _ -> True
+
+-- | Check the set for membership.  Only works with 8-bit characters:
+-- characters above code point 255 will give wrong answers.
+memberChar :: Char -> FastSet -> Bool
+memberChar c = memberWord8 (I.c2w c)
+{-# INLINE memberChar #-}
+
+mkTable :: B.ByteString -> B.ByteString
+mkTable s = I.unsafeCreate 32 $ \t -> do
+            _ <- I.memset t 0 32
+            U.unsafeUseAsCStringLen s $ \(p, l) ->
+              let loop n | n == l = return ()
+                         | otherwise = do
+                    c <- peekByteOff p n :: IO Word8
+                    let I byte bit = index (fromIntegral c)
+                    prev <- peekByteOff t byte :: IO Word8
+                    pokeByteOff t byte (prev .|. bit)
+                    loop (n + 1)
+              in loop 0
+
+charClass :: String -> FastSet
+charClass = set . B8.pack . go
+    where go (a:'-':b:xs) = [a..b] ++ go xs
+          go (x:xs) = x : go xs
+          go _ = ""

Data/Attoparsec/ByteString/Internal.hs

+{-# LANGUAGE BangPatterns, Rank2Types, OverloadedStrings, RecordWildCards #-}
+-- |
+-- Module      :  Data.Attoparsec.ByteString.Internal
+-- Copyright   :  Bryan O'Sullivan 2007-2011
+-- License     :  BSD3
+--
+-- Maintainer  :  bos@serpentine.com
+-- Stability   :  experimental
+-- Portability :  unknown
+--
+-- Simple, efficient parser combinators for 'B.ByteString' strings,
+-- loosely based on the Parsec library.
+
+module Data.Attoparsec.ByteString.Internal
+    (
+    -- * Parser types
+      Parser
+    , Result
+
+    -- * Running parsers
+    , parse
+    , parseOnly
+
+    -- * Combinators
+    , (<?>)
+    , try
+    , module Data.Attoparsec.Combinator
+
+    -- * Parsing individual bytes
+    , satisfy
+    , satisfyWith
+    , anyWord8
+    , skip
+    , word8
+    , notWord8
+
+    -- ** Byte classes
+    , inClass
+    , notInClass
+
+    -- * Parsing more complicated structures
+    , storable
+
+    -- * Efficient string handling
+    , skipWhile
+    , string
+    , stringTransform
+    , take
+    , scan
+    , takeWhile
+    , takeWhile1
+    , takeTill
+
+    -- ** Consume all remaining input
+    , takeByteString
+    , takeLazyByteString
+
+    -- * State observation and manipulation functions
+    , endOfInput
+    , atEnd
+    , ensure
+
+    -- * Utilities
+    , endOfLine
+    ) where
+
+import Control.Applicative ((<|>), (<$>))
+import Control.Monad (when)
+import Data.Attoparsec.ByteString.FastSet (charClass, memberWord8)
+import Data.Attoparsec.Combinator
+import Data.Attoparsec.Internal.Types
+    hiding (Parser, Input, Added, Failure, Success)
+import Data.Word (Word8)
+import Foreign.ForeignPtr (withForeignPtr)
+import Foreign.Ptr (castPtr, minusPtr, plusPtr)
+import Foreign.Storable (Storable(peek, sizeOf))
+import Prelude hiding (getChar, take, takeWhile)
+import System.IO.Unsafe (unsafePerformIO)
+import qualified Data.Attoparsec.Internal.Types as T
+import qualified Data.ByteString as B8
+import qualified Data.ByteString.Char8 as B
+import qualified Data.ByteString.Internal as B
+import qualified Data.ByteString.Lazy as L
+import qualified Data.ByteString.Unsafe as B
+
+type Parser a = T.Parser B.ByteString a
+type Result a = IResult B.ByteString a
+type Input = T.Input B.ByteString
+type Added = T.Added B.ByteString
+type Failure r = T.Failure B.ByteString r
+type Success a r = T.Success B.ByteString a r
+
+-- | If at least @n@ bytes of input are available, return the current
+-- input, otherwise fail.
+ensure :: Int -> Parser B.ByteString
+ensure !n = T.Parser $ \i0 a0 m0 kf ks ->
+    if B.length (unI i0) >= n
+    then ks i0 a0 m0 (unI i0)
+    else T.runParser (demandInput >> ensure n) i0 a0 m0 kf ks
+
+-- | Ask for input.  If we receive any, pass it to a success
+-- continuation, otherwise to a failure continuation.
+prompt :: Input -> Added -> More
+       -> (Input -> Added -> More -> Result r)
+       -> (Input -> Added -> More -> Result r)
+       -> Result r
+prompt i0 a0 _m0 kf ks = Partial $ \s ->
+    if B.null s
+    then kf i0 a0 Complete
+    else ks (I (unI i0 <> s)) (A (unA a0 <> s)) Incomplete
+
+-- | Immediately demand more input via a 'Partial' continuation
+-- result.
+demandInput :: Parser ()
+demandInput = T.Parser $ \i0 a0 m0 kf ks ->
+    if m0 == Complete
+    then kf i0 a0 m0 ["demandInput"] "not enough bytes"
+    else let kf' i a m = kf i a m ["demandInput"] "not enough bytes"
+             ks' i a m = ks i a m ()
+         in prompt i0 a0 m0 kf' ks'
+
+-- | This parser always succeeds.  It returns 'True' if any input is
+-- available either immediately or on demand, and 'False' if the end
+-- of all input has been reached.
+wantInput :: Parser Bool
+wantInput = T.Parser $ \i0 a0 m0 _kf ks ->
+  case () of
+    _ | not (B.null (unI i0)) -> ks i0 a0 m0 True
+      | m0 == Complete  -> ks i0 a0 m0 False
+      | otherwise       -> let kf' i a m = ks i a m False
+                               ks' i a m = ks i a m True
+                           in prompt i0 a0 m0 kf' ks'
+
+get :: Parser B.ByteString
+get  = T.Parser $ \i0 a0 m0 _kf ks -> ks i0 a0 m0 (unI i0)
+
+put :: B.ByteString -> Parser ()
+put s = T.Parser $ \_i0 a0 m0 _kf ks -> ks (I s) a0 m0 ()
+
+-- | Attempt a parse, and if it fails, rewind the input so that no
+-- input appears to have been consumed.
+--
+-- This combinator is provided for compatibility with Parsec.
+-- Attoparsec parsers always backtrack on failure.
+try :: Parser a -> Parser a
+try p = p
+{-# INLINE try #-}
+
+-- | The parser @satisfy p@ succeeds for any byte for which the
+-- predicate @p@ returns 'True'. Returns the byte that is actually
+-- parsed.
+--
+-- >digit = satisfy isDigit
+-- >    where isDigit w = w >= 48 && w <= 57
+satisfy :: (Word8 -> Bool) -> Parser Word8
+satisfy p = do
+  s <- ensure 1
+  let w = B.unsafeHead s
+  if p w
+    then put (B.unsafeTail s) >> return w
+    else fail "satisfy"
+
+-- | The parser @skip p@ succeeds for any byte for which the predicate
+-- @p@ returns 'True'.
+--
+-- >skipDigit = skip isDigit
+-- >    where isDigit w = w >= 48 && w <= 57
+skip :: (Word8 -> Bool) -> Parser ()
+skip p = do
+  s <- ensure 1
+  if p (B.unsafeHead s)
+    then put (B.unsafeTail s)
+    else fail "skip"
+
+-- | The parser @satisfyWith f p@ transforms a byte, and succeeds if
+-- the predicate @p@ returns 'True' on the transformed value. The
+-- parser returns the transformed byte that was parsed.
+satisfyWith :: (Word8 -> a) -> (a -> Bool) -> Parser a
+satisfyWith f p = do
+  s <- ensure 1
+  let c = f (B.unsafeHead s)
+  if p c
+    then put (B.unsafeTail s) >> return c
+    else fail "satisfyWith"
+
+storable :: Storable a => Parser a
+storable = hack undefined
+ where
+  hack :: Storable b => b -> Parser b
+  hack dummy = do
+    (fp,o,_) <- B.toForeignPtr `fmap` take (sizeOf dummy)
+    return . B.inlinePerformIO . withForeignPtr fp $ \p ->
+        peek (castPtr $ p `plusPtr` o)
+
+-- | Consume @n@ bytes of input, but succeed only if the predicate
+-- returns 'True'.
+takeWith :: Int -> (B.ByteString -> Bool) -> Parser B.ByteString
+takeWith n p = do
+  s <- ensure n
+  let h = B.unsafeTake n s
+      t = B.unsafeDrop n s
+  if p h
+    then put t >> return h
+    else fail "takeWith"
+
+-- | Consume exactly @n@ bytes of input.
+take :: Int -> Parser B.ByteString
+take n = takeWith n (const True)
+{-# INLINE take #-}
+
+-- | @string s@ parses a sequence of bytes that identically match
+-- @s@. Returns the parsed string (i.e. @s@).  This parser consumes no
+-- input if it fails (even if a partial match).
+--
+-- /Note/: The behaviour of this parser is different to that of the
+-- similarly-named parser in Parsec, as this one is all-or-nothing.
+-- To illustrate the difference, the following parser will fail under
+-- Parsec given an input of @"for"@:
+--
+-- >string "foo" <|> string "for"
+--
+-- The reason for its failure is that that the first branch is a
+-- partial match, and will consume the letters @\'f\'@ and @\'o\'@
+-- before failing.  In Attoparsec, the above parser will /succeed/ on
+-- that input, because the failed first branch will consume nothing.
+string :: B.ByteString -> Parser B.ByteString
+string s = takeWith (B.length s) (==s)
+{-# INLINE string #-}
+
+stringTransform :: (B.ByteString -> B.ByteString) -> B.ByteString
+                -> Parser B.ByteString
+stringTransform f s = takeWith (B.length s) ((==f s) . f)
+{-# INLINE stringTransform #-}
+
+-- | Skip past input for as long as the predicate returns 'True'.
+skipWhile :: (Word8 -> Bool) -> Parser ()
+skipWhile p = go
+ where
+  go = do
+    t <- B8.dropWhile p <$> get
+    put t
+    when (B.null t) $ do
+      input <- wantInput
+      when input go
+{-# INLINE skipWhile #-}
+
+-- | Consume input as long as the predicate returns 'False'
+-- (i.e. until it returns 'True'), and return the consumed input.
+--
+-- This parser does not fail.  It will return an empty string if the
+-- predicate returns 'True' on the first byte of input.
+--
+-- /Note/: Because this parser does not fail, do not use it with
+-- combinators such as 'many', because such parsers loop until a
+-- failure occurs.  Careless use will thus result in an infinite loop.
+takeTill :: (Word8 -> Bool) -> Parser B.ByteString
+takeTill p = takeWhile (not . p)
+{-# INLINE takeTill #-}
+
+-- | Consume input as long as the predicate returns 'True', and return
+-- the consumed input.
+--
+-- This parser does not fail.  It will return an empty string if the
+-- predicate returns 'False' on the first byte of input.
+--
+-- /Note/: Because this parser does not fail, do not use it with
+-- combinators such as 'many', because such parsers loop until a
+-- failure occurs.  Careless use will thus result in an infinite loop.
+takeWhile :: (Word8 -> Bool) -> Parser B.ByteString
+takeWhile p = (B.concat . reverse) `fmap` go []
+ where
+  go acc = do
+    (h,t) <- B8.span p <$> get
+    put t
+    if B.null t
+      then do
+        input <- wantInput
+        if input
+          then go (h:acc)
+          else return (h:acc)
+      else return (h:acc)
+
+takeRest :: Parser [B.ByteString]
+takeRest = go []
+ where
+  go acc = do
+    input <- wantInput
+    if input
+      then do
+        s <- get
+        put B.empty
+        go (s:acc)
+      else return (reverse acc)
+
+-- | Consume all remaining input and return it as a single string.
+takeByteString :: Parser B.ByteString
+takeByteString = B.concat `fmap` takeRest
+
+-- | Consume all remaining input and return it as a single string.
+takeLazyByteString :: Parser L.ByteString
+takeLazyByteString = L.fromChunks `fmap` takeRest
+
+data T s = T {-# UNPACK #-} !Int s
+
+-- | A stateful scanner.  The predicate consumes and transforms a
+-- state argument, and each transformed state is passed to successive
+-- invocations of the predicate on each byte of the input until one
+-- returns 'Nothing' or the input ends.
+--
+-- This parser does not fail.  It will return an empty string if the
+-- predicate returns 'Nothing' on the first byte of input.
+--
+-- /Note/: Because this parser does not fail, do not use it with
+-- combinators such as 'many', because such parsers loop until a
+-- failure occurs.  Careless use will thus result in an infinite loop.
+scan :: s -> (s -> Word8 -> Maybe s) -> Parser B.ByteString
+scan s0 p = do
+  chunks <- go [] s0
+  case chunks of
+    [x] -> return x
+    xs  -> return . B.concat . reverse $ xs
+ where
+  go acc s1 = do
+    let scanner (B.PS fp off len) =
+          withForeignPtr fp $ \ptr0 -> do
+            let start = ptr0 `plusPtr` off
+                end   = start `plusPtr` len
+                inner ptr !s
+                  | ptr < end = do
+                    w <- peek ptr
+                    case p s w of
+                      Just s' -> inner (ptr `plusPtr` 1) s'
+                      _       -> done (ptr `minusPtr` start) s
+                  | otherwise = done (ptr `minusPtr` start) s
+                done !i !s = return (T i s)
+            inner start s1
+    bs <- get
+    let T i s' = unsafePerformIO $ scanner bs
+        h = B.unsafeTake i bs
+        t = B.unsafeDrop i bs
+    put t
+    if B.null t
+      then do
+        input <- wantInput
+        if input
+          then go (h:acc) s'
+          else return (h:acc)
+      else return (h:acc)
+{-# INLINE scan #-}
+
+-- | Consume input as long as the predicate returns 'True', and return
+-- the consumed input.
+--
+-- This parser requires the predicate to succeed on at least one byte
+-- of input: it will fail if the predicate never returns 'True' or if
+-- there is no input left.
+takeWhile1 :: (Word8 -> Bool) -> Parser B.ByteString
+takeWhile1 p = do
+  (`when` demandInput) =<< B.null <$> get
+  (h,t) <- B8.span p <$> get
+  when (B.null h) $ fail "takeWhile1"
+  put t
+  if B.null t
+    then (h<>) `fmap` takeWhile p
+    else return h
+
+-- | Match any byte in a set.
+--
+-- >vowel = inClass "aeiou"
+--
+-- Range notation is supported.
+--
+-- >halfAlphabet = inClass "a-nA-N"
+--
+-- To add a literal @\'-\'@ to a set, place it at the beginning or end
+-- of the string.
+inClass :: String -> Word8 -> Bool
+inClass s = (`memberWord8` mySet)
+    where mySet = charClass s
+          {-# NOINLINE mySet #-}
+{-# INLINE inClass #-}
+
+-- | Match any byte not in a set.
+notInClass :: String -> Word8 -> Bool
+notInClass s = not . inClass s
+{-# INLINE notInClass #-}
+
+-- | Match any byte.
+anyWord8 :: Parser Word8
+anyWord8 = satisfy $ const True
+{-# INLINE anyWord8 #-}
+
+-- | Match a specific byte.
+word8 :: Word8 -> Parser Word8
+word8 c = satisfy (== c) <?> show c
+{-# INLINE word8 #-}
+
+-- | Match any byte except the given one.
+notWord8 :: Word8 -> Parser Word8
+notWord8 c = satisfy (/= c) <?> "not " ++ show c
+{-# INLINE notWord8 #-}
+
+-- | Match only if all input has been consumed.
+endOfInput :: Parser ()
+endOfInput = T.Parser $ \i0 a0 m0 kf ks ->
+             if B.null (unI i0)
+             then if m0 == Complete
+                  then ks i0 a0 m0 ()
+                  else let kf' i1 a1 m1 _ _ = addS i0 a0 m0 i1 a1 m1 $
+                                              \ i2 a2 m2 -> ks i2 a2 m2 ()
+                           ks' i1 a1 m1 _   = addS i0 a0 m0 i1 a1 m1 $
+                                              \ i2 a2 m2 -> kf i2 a2 m2 []
+                                                            "endOfInput"
+                       in  T.runParser demandInput i0 a0 m0 kf' ks'
+             else kf i0 a0 m0 [] "endOfInput"
+
+-- | Return an indication of whether the end of input has been
+-- reached.
+atEnd :: Parser Bool
+atEnd = not <$> wantInput
+{-# INLINE atEnd #-}
+
+-- | Match either a single newline character @\'\\n\'@, or a carriage
+-- return followed by a newline character @\"\\r\\n\"@.
+endOfLine :: Parser ()
+endOfLine = (word8 10 >> return ()) <|> (string "\r\n" >> return ())
+
+--- | Name the parser, in case failure occurs.
+(<?>) :: Parser a
+      -> String                 -- ^ the name to use if parsing fails
+      -> Parser a
+p <?> msg0 = T.Parser $ \i0 a0 m0 kf ks ->
+             let kf' i a m strs msg = kf i a m (msg0:strs) msg
+             in T.runParser p i0 a0 m0 kf' ks
+{-# INLINE (<?>) #-}
+infix 0 <?>
+
+-- | Terminal failure continuation.
+failK :: Failure a
+failK i0 _a0 _m0 stack msg = Fail (unI i0) stack msg
+{-# INLINE failK #-}
+
+-- | Terminal success continuation.
+successK :: Success a a
+successK i0 _a0 _m0 a = Done (unI i0) a
+{-# INLINE successK #-}
+
+-- | Run a parser.
+parse :: Parser a -> B.ByteString -> Result a
+parse m s = T.runParser m (I s) (A B.empty) Incomplete failK successK
+{-# INLINE parse #-}
+
+-- | Run a parser that cannot be resupplied via a 'Partial' result.
+parseOnly :: Parser a -> B.ByteString -> Either String a
+parseOnly m s = case T.runParser m (I s) (A B.empty) Complete failK successK of
+                  Fail _ _ err -> Left err
+                  Done _ a     -> Right a
+                  _            -> error "parseOnly: impossible error!"
+{-# INLINE parseOnly #-}

Data/Attoparsec/ByteString/Lazy.hs

+-- |
+-- Module      :  Data.Attoparsec.ByteString.Lazy
+-- Copyright   :  Bryan O'Sullivan 2010, 2011
+-- License     :  BSD3
+-- 
+-- Maintainer  :  bos@serpentine.com
+-- Stability   :  experimental
+-- Portability :  unknown
+--
+-- Simple, efficient combinator parsing for lazy 'ByteString'
+-- strings, loosely based on the Parsec library.
+--
+-- This is essentially the same code as in the 'Data.Attoparsec'
+-- module, only with a 'parse' function that can consume a lazy
+-- 'ByteString' incrementally, and a 'Result' type that does not allow
+-- more input to be fed in.  Think of this as suitable for use with a
+-- lazily read file, e.g. via 'L.readFile' or 'L.hGetContents'.
+--
+-- Behind the scenes, strict 'B.ByteString' values are still used
+-- internally to store parser input and manipulate it efficiently.
+-- High-performance parsers such as 'string' still expect strict
+-- 'B.ByteString' parameters.
+
+module Data.Attoparsec.ByteString.Lazy
+    (
+      Result(..)
+    , module Data.Attoparsec.ByteString
+    -- * Running parsers
+    , parse
+    , parseTest
+    -- ** Result conversion
+    , maybeResult
+    , eitherResult
+    ) where
+
+import Data.ByteString.Lazy.Internal (ByteString(..), chunk)
+import qualified Data.ByteString as B
+import qualified Data.Attoparsec.ByteString as A
+import qualified Data.Attoparsec.Internal.Types as T
+import Data.Attoparsec.ByteString
+    hiding (IResult(..), Result, eitherResult, maybeResult,
+            parse, parseWith, parseTest)
+
+-- | The result of a parse.
+data Result r = Fail ByteString [String] String
+              -- ^ The parse failed.  The 'ByteString' is the input
+              -- that had not yet been consumed when the failure
+              -- occurred.  The @[@'String'@]@ is a list of contexts
+              -- in which the error occurred.  The 'String' is the
+              -- message describing the error, if any.
+              | Done ByteString r
+              -- ^ The parse succeeded.  The 'ByteString' is the
+              -- input that had not yet been consumed (if any) when
+              -- the parse succeeded.
+
+instance Show r => Show (Result r) where
+    show (Fail bs stk msg) =
+        "Fail " ++ show bs ++ " " ++ show stk ++ " " ++ show msg
+    show (Done bs r)       = "Done " ++ show bs ++ " " ++ show r
+
+fmapR :: (a -> b) -> Result a -> Result b
+fmapR _ (Fail st stk msg) = Fail st stk msg
+fmapR f (Done bs r)       = Done bs (f r)
+
+instance Functor Result where
+    fmap = fmapR
+
+-- | Run a parser and return its result.
+parse :: A.Parser a -> ByteString -> Result a
+parse p s = case s of
+              Chunk x xs -> go (A.parse p x) xs
+              empty      -> go (A.parse p B.empty) empty
+  where
+    go (T.Fail x stk msg) ys      = Fail (chunk x ys) stk msg
+    go (T.Done x r) ys            = Done (chunk x ys) r
+    go (T.Partial k) (Chunk y ys) = go (k y) ys
+    go (T.Partial k) empty        = go (k B.empty) empty
+
+-- | Run a parser and print its result to standard output.
+parseTest :: (Show a) => A.Parser a -> ByteString -> IO ()
+parseTest p s = print (parse p s)
+
+-- | Convert a 'Result' value to a 'Maybe' value.
+maybeResult :: Result r -> Maybe r
+maybeResult (Done _ r) = Just r
+maybeResult _          = Nothing
+
+-- | Convert a 'Result' value to an 'Either' value.
+eitherResult :: Result r -> Either String r
+eitherResult (Done _ r)     = Right r
+eitherResult (Fail _ _ msg) = Left msg

Data/Attoparsec/Char8.hs

-{-# LANGUAGE BangPatterns, FlexibleInstances #-}
-{-# OPTIONS_GHC -fno-warn-orphans #-}
-
 -- |
 -- Module      :  Data.Attoparsec.Char8
--- Copyright   :  Bryan O'Sullivan 2007-2010
+-- Copyright   :  Bryan O'Sullivan 2007-2011
 -- License     :  BSD3
 -- 
 -- Maintainer  :  bos@serpentine.com
 -- Portability :  unknown
 --
 -- Simple, efficient, character-oriented combinator parsing for
--- 'B.ByteString' strings, loosely based on the Parsec library.
+-- 'ByteString' strings, loosely based on the Parsec library.
 
 module Data.Attoparsec.Char8
     (
-    -- * Character encodings
-    -- $encodings
-
-    -- * Parser types
-      Parser
-    , A.Result(..)
-
-    -- * Running parsers
-    , A.parse
-    , A.feed
-    , A.parseOnly
-    , A.parseTest
-    , A.parseWith
-
-    -- ** Result conversion
-    , A.maybeResult
-    , A.eitherResult
-
-    -- * Combinators
-    , (I.<?>)
-    , I.try
-    , module Data.Attoparsec.Combinator
-
-    -- * Parsing individual characters
-    , char
-    , char8
-    , anyChar
-    , notChar
-    , satisfy
-
-    -- ** Special character parsers
-    , digit
-    , letter_iso8859_15
-    , letter_ascii
-    , space
-
-    -- ** Fast predicates
-    , isDigit
-    , isDigit_w8
-    , isAlpha_iso8859_15
-    , isAlpha_ascii
-    , isSpace
-    , isSpace_w8
-
-    -- *** Character classes
-    , inClass
-    , notInClass
-
-    -- * Efficient string handling
-    , I.string
-    , stringCI
-    , skipSpace
-    , skipWhile
-    , I.take
-    , scan
-    , takeWhile
-    , takeWhile1
-    , takeTill
-
-    -- ** Consume all remaining input
-    , I.takeByteString
-    , I.takeLazyByteString
-
-    -- * Text parsing
-    , I.endOfLine
-    , isEndOfLine
-    , isHorizontalSpace
-
-    -- * Numeric parsers
-    , decimal
-    , hexadecimal
-    , signed
-    , double
-    , Number(..)
-    , number
-    , rational
-
-    -- * State observation and manipulation functions
-    , I.endOfInput
-    , I.atEnd
+      module Data.Attoparsec.ByteString.Char8
     ) where
 
-import Control.Applicative ((*>), (<$>), (<|>))
-import Data.Attoparsec.Combinator
-import Data.Attoparsec.FastSet (charClass, memberChar)
-import Data.Attoparsec.Internal (Parser, (<?>))
-import Data.Attoparsec.Number (Number(..))
-import Data.Bits (Bits, (.|.), shiftL)
-import Data.ByteString.Internal (c2w, w2c)
-import Data.Int (Int8, Int16, Int32, Int64)
-import Data.Ratio ((%))
-import Data.String (IsString(..))
-import Data.Word (Word8, Word16, Word32, Word64, Word)
-import Prelude hiding (takeWhile)
-import qualified Data.Attoparsec as A
-import qualified Data.Attoparsec.Internal as I
-import qualified Data.ByteString as B8
-import qualified Data.ByteString.Char8 as B
-
-instance IsString (Parser B.ByteString) where
-    fromString = I.string . B.pack
-
--- $encodings
---
--- This module is intended for parsing text that is
--- represented using an 8-bit character set, e.g. ASCII or
--- ISO-8859-15.  It /does not/ make any attempt to deal with character
--- encodings, multibyte characters, or wide characters.  In
--- particular, all attempts to use characters above code point U+00FF
--- will give wrong answers.
---
--- Code points below U+0100 are simply translated to and from their
--- numeric values, so e.g. the code point U+00A4 becomes the byte
--- @0xA4@ (which is the Euro symbol in ISO-8859-15, but the generic
--- currency sign in ISO-8859-1).  Haskell 'Char' values above U+00FF
--- are truncated, so e.g. U+1D6B7 is truncated to the byte @0xB7@.
-
--- ASCII-specific but fast, oh yes.
-toLower :: Word8 -> Word8
-toLower w | w >= 65 && w <= 90 = w + 32
-          | otherwise          = w
-
--- | Satisfy a literal string, ignoring case.
-stringCI :: B.ByteString -> Parser B.ByteString
-stringCI = I.stringTransform (B8.map toLower)
-{-# INLINE stringCI #-}
-
--- | Consume input as long as the predicate returns 'True', and return
--- the consumed input.
---
--- This parser requires the predicate to succeed on at least one byte
--- of input: it will fail if the predicate never returns 'True' or if
--- there is no input left.
-takeWhile1 :: (Char -> Bool) -> Parser B.ByteString
-takeWhile1 p = I.takeWhile1 (p . w2c)
-{-# INLINE takeWhile1 #-}
-
--- | The parser @satisfy p@ succeeds for any byte for which the
--- predicate @p@ returns 'True'. Returns the byte that is actually
--- parsed.
---
--- >digit = satisfy isDigit
--- >    where isDigit c = c >= '0' && c <= '9'
-satisfy :: (Char -> Bool) -> Parser Char
-satisfy = I.satisfyWith w2c
-{-# INLINE satisfy #-}
-
--- | Match a letter, in the ISO-8859-15 encoding.
-letter_iso8859_15 :: Parser Char
-letter_iso8859_15 = satisfy isAlpha_iso8859_15 <?> "letter_iso8859_15"
-{-# INLINE letter_iso8859_15 #-}
-
--- | Match a letter, in the ASCII encoding.
-letter_ascii :: Parser Char
-letter_ascii = satisfy isAlpha_ascii <?> "letter_ascii"
-{-# INLINE letter_ascii #-}
-
--- | A fast alphabetic predicate for the ISO-8859-15 encoding
---
--- /Note/: For all character encodings other than ISO-8859-15, and
--- almost all Unicode code points above U+00A3, this predicate gives
--- /wrong answers/.
-isAlpha_iso8859_15 :: Char -> Bool
-isAlpha_iso8859_15 c = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
-                       (c >= '\166' && moby c)
-  where moby = notInClass "\167\169\171-\179\182\183\185\187\191\215\247"
-        {-# NOINLINE moby #-}
-{-# INLINE isAlpha_iso8859_15 #-}
-
--- | A fast alphabetic predicate for the ASCII encoding
---
--- /Note/: For all character encodings other than ASCII, and
--- almost all Unicode code points above U+007F, this predicate gives
--- /wrong answers/.
-isAlpha_ascii :: Char -> Bool
-isAlpha_ascii c = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
-{-# INLINE isAlpha_ascii #-}
-
--- | Parse a single digit.
-digit :: Parser Char
-digit = satisfy isDigit <?> "digit"
-{-# INLINE digit #-}
-
--- | A fast digit predicate.
-isDigit :: Char -> Bool
-isDigit c = c >= '0' && c <= '9'
-{-# INLINE isDigit #-}
-
--- | A fast digit predicate.
-isDigit_w8 :: Word8 -> Bool
-isDigit_w8 w = w >= 48 && w <= 57
-{-# INLINE isDigit_w8 #-}
-
--- | Match any character.
-anyChar :: Parser Char
-anyChar = satisfy $ const True
-{-# INLINE anyChar #-}
-
--- | Fast predicate for matching ASCII space characters.
---
--- /Note/: This predicate only gives correct answers for the ASCII
--- encoding.  For instance, it does not recognise U+00A0 (non-breaking
--- space) as a space character, even though it is a valid ISO-8859-15
--- byte. For a Unicode-aware and only slightly slower predicate,
--- use 'Data.Char.isSpace'
-isSpace :: Char -> Bool
-isSpace c = (c == ' ') || ('\t' <= c && c <= '\r')
-{-# INLINE isSpace #-}
-
--- | Fast 'Word8' predicate for matching ASCII space characters.
-isSpace_w8 :: Word8 -> Bool
-isSpace_w8 w = (w == 32) || (9 <= w && w <= 13)
-{-# INLINE isSpace_w8 #-}
-
-
--- | Parse a space character.
---
--- /Note/: This parser only gives correct answers for the ASCII
--- encoding.  For instance, it does not recognise U+00A0 (non-breaking
--- space) as a space character, even though it is a valid ISO-8859-15
--- byte.
-space :: Parser Char
-space = satisfy isSpace <?> "space"
-{-# INLINE space #-}
-
--- | Match a specific character.
-char :: Char -> Parser Char
-char c = satisfy (== c) <?> [c]
-{-# INLINE char #-}
-
--- | Match a specific character, but return its 'Word8' value.
-char8 :: Char -> Parser Word8
-char8 c = I.satisfy (== c2w c) <?> [c]
-{-# INLINE char8 #-}
-
--- | Match any character except the given one.
-notChar :: Char -> Parser Char
-notChar c = satisfy (/= c) <?> "not " ++ [c]
-{-# INLINE notChar #-}
-
--- | Match any character in a set.
---
--- >vowel = inClass "aeiou"
---
--- Range notation is supported.
---
--- >halfAlphabet = inClass "a-nA-N"
---
--- To add a literal \'-\' to a set, place it at the beginning or end
--- of the string.
-inClass :: String -> Char -> Bool
-inClass s = (`memberChar` mySet)
-    where mySet = charClass s
-{-# INLINE inClass #-}
-
--- | Match any character not in a set.
-notInClass :: String -> Char -> Bool
-notInClass s = not . inClass s
-{-# INLINE notInClass #-}
-
--- | Consume input as long as the predicate returns 'True', and return
--- the consumed input.
---
--- This parser does not fail.  It will return an empty string if the
--- predicate returns 'False' on the first byte of input.
---
--- /Note/: Because this parser does not fail, do not use it with
--- combinators such as 'many', because such parsers loop until a
--- failure occurs.  Careless use will thus result in an infinite loop.
-takeWhile :: (Char -> Bool) -> Parser B.ByteString
-takeWhile p = I.takeWhile (p . w2c)
-{-# INLINE takeWhile #-}
-
--- | A stateful scanner.  The predicate consumes and transforms a
--- state argument, and each transformed state is passed to successive
--- invocations of the predicate on each byte of the input until one
--- returns 'Nothing' or the input ends.
---
--- This parser does not fail.  It will return an empty string if the
--- predicate returns 'Nothing' on the first byte of input.
---
--- /Note/: Because this parser does not fail, do not use it with
--- combinators such as 'many', because such parsers loop until a
--- failure occurs.  Careless use will thus result in an infinite loop.
-scan :: s -> (s -> Char -> Maybe s) -> Parser B.ByteString
-scan s0 p = I.scan s0 (\s -> p s . w2c)
-{-# INLINE scan #-}
-
--- | Consume input as long as the predicate returns 'False'
--- (i.e. until it returns 'True'), and return the consumed input.
---
--- This parser does not fail.  It will return an empty string if the
--- predicate returns 'True' on the first byte of input.
---
--- /Note/: Because this parser does not fail, do not use it with
--- combinators such as 'many', because such parsers loop until a
--- failure occurs.  Careless use will thus result in an infinite loop.
-takeTill :: (Char -> Bool) -> Parser B.ByteString
-takeTill p = I.takeTill (p . w2c)
-{-# INLINE takeTill #-}
-
--- | Skip past input for as long as the predicate returns 'True'.
-skipWhile :: (Char -> Bool) -> Parser ()
-skipWhile p = I.skipWhile (p . w2c)
-{-# INLINE skipWhile #-}
-
--- | Skip over white space.
-skipSpace :: Parser ()
-skipSpace = I.skipWhile isSpace_w8
-{-# INLINE skipSpace #-}
-
--- | A predicate that matches either a carriage return @\'\\r\'@ or
--- newline @\'\\n\'@ character.
-isEndOfLine :: Word8 -> Bool
-isEndOfLine w = w == 13 || w == 10
-{-# INLINE isEndOfLine #-}
-
--- | A predicate that matches either a space @\' \'@ or horizontal tab
--- @\'\\t\'@ character.
-isHorizontalSpace :: Word8 -> Bool
-isHorizontalSpace w = w == 32 || w == 9
-{-# INLINE isHorizontalSpace #-}
-
--- | Parse and decode an unsigned hexadecimal number.  The hex digits
--- @\'a\'@ through @\'f\'@ may be upper or lower case.
---
--- This parser does not accept a leading @\"0x\"@ string.
-hexadecimal :: (Integral a, Bits a) => Parser a
-hexadecimal = B8.foldl' step 0 `fmap` I.takeWhile1 isHexDigit
-  where
-    isHexDigit w = (w >= 48 && w <= 57) ||
-                   (w >= 97 && w <= 102) ||
-                   (w >= 65 && w <= 90)
-    step a w | w >= 48 && w <= 57  = (a `shiftL` 4) .|. fromIntegral (w - 48)
-             | w >= 97             = (a `shiftL` 4) .|. fromIntegral (w - 87)
-             | otherwise           = (a `shiftL` 4) .|. fromIntegral (w - 55)
-{-# SPECIALISE hexadecimal :: Parser Int #-}
-{-# SPECIALISE hexadecimal :: Parser Int8 #-}
-{-# SPECIALISE hexadecimal :: Parser Int16 #-}
-{-# SPECIALISE hexadecimal :: Parser Int32 #-}
-{-# SPECIALISE hexadecimal :: Parser Int64 #-}
-{-# SPECIALISE hexadecimal :: Parser Integer #-}
-{-# SPECIALISE hexadecimal :: Parser Word #-}
-{-# SPECIALISE hexadecimal :: Parser Word8 #-}
-{-# SPECIALISE hexadecimal :: Parser Word16 #-}
-{-# SPECIALISE hexadecimal :: Parser Word32 #-}
-{-# SPECIALISE hexadecimal :: Parser Word64 #-}
-
--- | Parse and decode an unsigned decimal number.
-decimal :: Integral a => Parser a
-decimal = B8.foldl' step 0 `fmap` I.takeWhile1 isDig
-  where isDig w  = w >= 48 && w <= 57
-        step a w = a * 10 + fromIntegral (w - 48)
-{-# SPECIALISE decimal :: Parser Int #-}
-{-# SPECIALISE decimal :: Parser Int8 #-}
-{-# SPECIALISE decimal :: Parser Int16 #-}
-{-# SPECIALISE decimal :: Parser Int32 #-}
-{-# SPECIALISE decimal :: Parser Int64 #-}
-{-# SPECIALISE decimal :: Parser Integer #-}
-{-# SPECIALISE decimal :: Parser Word #-}
-{-# SPECIALISE decimal :: Parser Word8 #-}
-{-# SPECIALISE decimal :: Parser Word16 #-}
-{-# SPECIALISE decimal :: Parser Word32 #-}
-{-# SPECIALISE decimal :: Parser Word64 #-}
-
--- | Parse a number with an optional leading @\'+\'@ or @\'-\'@ sign
--- character.
-signed :: Num a => Parser a -> Parser a
-{-# SPECIALISE signed :: Parser Int -> Parser Int #-}
-{-# SPECIALISE signed :: Parser Int8 -> Parser Int8 #-}
-{-# SPECIALISE signed :: Parser Int16 -> Parser Int16 #-}
-{-# SPECIALISE signed :: Parser Int32 -> Parser Int32 #-}
-{-# SPECIALISE signed :: Parser Int64 -> Parser Int64 #-}
-{-# SPECIALISE signed :: Parser Integer -> Parser Integer #-}
-signed p = (negate <$> (char8 '-' *> p))
-       <|> (char8 '+' *> p)
-       <|> p
-
--- | Parse a rational number.
---
--- This parser accepts an optional leading sign character, followed by
--- at least one decimal digit.  The syntax similar to that accepted by
--- the 'read' function, with the exception that a trailing @\'.\'@ or
--- @\'e\'@ /not/ followed by a number is not consumed.
---
--- Examples with behaviour identical to 'read', if you feed an empty
--- continuation to the first result:
---
--- >rational "3"     == Done 3.0 ""
--- >rational "3.1"   == Done 3.1 ""
--- >rational "3e4"   == Done 30000.0 ""
--- >rational "3.1e4" == Done 31000.0, ""
-
--- Examples with behaviour identical to 'read':
---
--- >rational ".3"    == Fail "input does not start with a digit"
--- >rational "e3"    == Fail "input does not start with a digit"