Commits

Mario Blažević committed 3a33369

Re-integrated the Bytestring and ByteString.Char8 modules, just in case.

Comments (0)

Files changed (5)

Data/Picoparsec/ByteString.hs

+-- |
+-- Module      :  Data.Picoparsec.ByteString
+-- Copyright   :  Bryan O'Sullivan 2007-2011, Mario Blažević <blamario@yahoo.com> 2014
+-- License     :  BSD3
+--
+-- Maintainer  :  Mario Blažević
+-- Stability   :  experimental
+-- Portability :  unknown
+--
+-- Fast, simple parsers for 'B.ByteString' strings, carried over from Attoparsec.
+
+module Data.Picoparsec.ByteString
+    (
+    -- * Parsing individual bytes
+      I.word8
+    , I.anyWord8
+    , I.notWord8
+    , I.satisfy
+    , I.satisfyWith
+    , I.skip
+
+    -- ** Lookahead
+    , I.peekWord8
+    , I.peekWord8'
+
+    -- ** Byte classes
+    , I.inClass
+    , I.notInClass
+
+    -- * Efficient string handling
+    , I.string
+    , I.skipWhile
+    , I.take
+    , I.scan
+    , I.takeWhile
+    , I.takeWhile1
+    , I.takeTill
+    ) where
+
+import qualified Data.Picoparsec.ByteString.Internal as I

Data/Picoparsec/ByteString/Char8.hs

+{-# LANGUAGE BangPatterns, FlexibleInstances, TypeFamilies,
+    TypeSynonymInstances, GADTs #-}
+{-# OPTIONS_GHC -fno-warn-orphans #-}
+
+-- |
+-- Module      :  Data.Picoparsec.ByteString.Char8
+-- Copyright   :  Bryan O'Sullivan 2007-2011
+-- License     :  BSD3
+--
+-- Maintainer  :  bos@serpentine.com
+-- Stability   :  experimental
+-- Portability :  unknown
+--
+-- Simple, efficient, character-oriented combinator parsing for
+-- 'B.ByteString' strings, loosely based on the Parsec library.
+
+module Data.Picoparsec.ByteString.Char8
+    (
+    -- * Character encodings
+    -- $encodings
+
+    -- * Parsing individual characters
+      char
+    , char8
+    , anyChar
+    , notChar
+    , satisfy
+
+    -- ** Lookahead
+    , peekChar
+    , peekChar'
+
+    -- ** Special character parsers
+    , digit
+    , letter_iso8859_15
+    , letter_ascii
+    , space
+
+    -- ** Fast predicates
+    , isDigit
+    , isDigit_w8
+    , isAlpha_iso8859_15
+    , isAlpha_ascii
+    , isSpace
+    , isSpace_w8
+
+    -- *** Character classes
+    , inClass
+    , notInClass
+
+    -- * Efficient string handling
+    , I.string
+    , stringCI
+    , skipSpace
+    , skipWhile
+    , I.take
+    , scan
+    , takeWhile
+    , takeWhile1
+    , takeTill
+
+    -- ** String combinators
+    -- $specalt
+    , (.*>)
+    , (<*.)
+
+    -- * Text parsing
+    , I.endOfLine
+    , isEndOfLine
+    , isHorizontalSpace
+
+    -- * Numeric parsers
+    , decimal
+    , hexadecimal
+    , signed
+    , double
+    , Number(..)
+    , number
+    , rational
+    , scientific
+    ) where
+
+import Control.Applicative (pure, (*>), (<*), (<$>), (<|>))
+import Data.Picoparsec.ByteString.FastSet (charClass, memberChar)
+import Data.Picoparsec.ByteString.Internal (Parser)
+import Data.Picoparsec.Combinator
+import Data.Picoparsec.Number (Number(..))
+import Data.Bits (Bits, (.|.), shiftL)
+import Data.ByteString.Internal (c2w, w2c)
+import Data.Int (Int8, Int16, Int32, Int64)
+import Data.Scientific (Scientific, coefficient, base10Exponent)
+import qualified Data.Scientific as Sci (scientific)
+import Data.Word (Word8, Word16, Word32, Word64, Word)
+import Prelude hiding (takeWhile)
+import qualified Data.Picoparsec.ByteString.Internal as I
+import qualified Data.ByteString as B8
+import qualified Data.ByteString.Char8 as B
+
+-- $encodings
+--
+-- This module is intended for parsing text that is
+-- represented using an 8-bit character set, e.g. ASCII or
+-- ISO-8859-15.  It /does not/ make any attempt to deal with character
+-- encodings, multibyte characters, or wide characters.  In
+-- particular, all attempts to use characters above code point U+00FF
+-- will give wrong answers.
+--
+-- Code points below U+0100 are simply translated to and from their
+-- numeric values, so e.g. the code point U+00A4 becomes the byte
+-- @0xA4@ (which is the Euro symbol in ISO-8859-15, but the generic
+-- currency sign in ISO-8859-1).  Haskell 'Char' values above U+00FF
+-- are truncated, so e.g. U+1D6B7 is truncated to the byte @0xB7@.
+
+-- ASCII-specific but fast, oh yes.
+toLower :: Word8 -> Word8
+toLower w | w >= 65 && w <= 90 = w + 32
+          | otherwise          = w
+
+-- | Satisfy a literal string, ignoring case.
+stringCI :: B.ByteString -> Parser B.ByteString
+stringCI = I.stringTransform (B8.map toLower)
+{-# INLINE stringCI #-}
+
+-- | Consume input as long as the predicate returns 'True', and return
+-- the consumed input.
+--
+-- This parser requires the predicate to succeed on at least one byte
+-- of input: it will fail if the predicate never returns 'True' or if
+-- there is no input left.
+takeWhile1 :: (Char -> Bool) -> Parser B.ByteString
+takeWhile1 p = I.takeWhile1 (p . w2c)
+{-# INLINE takeWhile1 #-}
+
+-- | The parser @satisfy p@ succeeds for any byte for which the
+-- predicate @p@ returns 'True'. Returns the byte that is actually
+-- parsed.
+--
+-- >digit = satisfy isDigit
+-- >    where isDigit c = c >= '0' && c <= '9'
+satisfy :: (Char -> Bool) -> Parser Char
+satisfy = I.satisfyWith w2c
+{-# INLINE satisfy #-}
+
+-- | Match a letter, in the ISO-8859-15 encoding.
+letter_iso8859_15 :: Parser Char
+letter_iso8859_15 = satisfy isAlpha_iso8859_15 <?> "letter_iso8859_15"
+{-# INLINE letter_iso8859_15 #-}
+
+-- | Match a letter, in the ASCII encoding.
+letter_ascii :: Parser Char
+letter_ascii = satisfy isAlpha_ascii <?> "letter_ascii"
+{-# INLINE letter_ascii #-}
+
+-- | A fast alphabetic predicate for the ISO-8859-15 encoding
+--
+-- /Note/: For all character encodings other than ISO-8859-15, and
+-- almost all Unicode code points above U+00A3, this predicate gives
+-- /wrong answers/.
+isAlpha_iso8859_15 :: Char -> Bool
+isAlpha_iso8859_15 c = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
+                       (c >= '\166' && moby c)
+  where moby = notInClass "\167\169\171-\179\182\183\185\187\191\215\247"
+        {-# NOINLINE moby #-}
+{-# INLINE isAlpha_iso8859_15 #-}
+
+-- | A fast alphabetic predicate for the ASCII encoding
+--
+-- /Note/: For all character encodings other than ASCII, and
+-- almost all Unicode code points above U+007F, this predicate gives
+-- /wrong answers/.
+isAlpha_ascii :: Char -> Bool
+isAlpha_ascii c = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
+{-# INLINE isAlpha_ascii #-}
+
+-- | Parse a single digit.
+digit :: Parser Char
+digit = satisfy isDigit <?> "digit"
+{-# INLINE digit #-}
+
+-- | A fast digit predicate.
+isDigit :: Char -> Bool
+isDigit c = c >= '0' && c <= '9'
+{-# INLINE isDigit #-}
+
+-- | A fast digit predicate.
+isDigit_w8 :: Word8 -> Bool
+isDigit_w8 w = w >= 48 && w <= 57
+{-# INLINE isDigit_w8 #-}
+
+-- | Match any character.
+anyChar :: Parser Char
+anyChar = satisfy $ const True
+{-# INLINE anyChar #-}
+
+-- | Match any character, to perform lookahead. Returns 'Nothing' if
+-- end of input has been reached. Does not consume any input.
+--
+-- /Note/: Because this parser does not fail, do not use it with
+-- combinators such as 'many', because such parsers loop until a
+-- failure occurs.  Careless use will thus result in an infinite loop.
+peekChar :: Parser (Maybe Char)
+peekChar = (fmap w2c) `fmap` I.peekWord8
+{-# INLINE peekChar #-}
+
+-- | Match any character, to perform lookahead.  Does not consume any
+-- input, but will fail if end of input has been reached.
+peekChar' :: Parser Char
+peekChar' = w2c `fmap` I.peekWord8'
+{-# INLINE peekChar' #-}
+
+-- | Fast predicate for matching ASCII space characters.
+--
+-- /Note/: This predicate only gives correct answers for the ASCII
+-- encoding.  For instance, it does not recognise U+00A0 (non-breaking
+-- space) as a space character, even though it is a valid ISO-8859-15
+-- byte. For a Unicode-aware and only slightly slower predicate,
+-- use 'Data.Char.isSpace'
+isSpace :: Char -> Bool
+isSpace c = (c == ' ') || ('\t' <= c && c <= '\r')
+{-# INLINE isSpace #-}
+
+-- | Fast 'Word8' predicate for matching ASCII space characters.
+isSpace_w8 :: Word8 -> Bool
+isSpace_w8 w = (w == 32) || (9 <= w && w <= 13)
+{-# INLINE isSpace_w8 #-}
+
+
+-- | Parse a space character.
+--
+-- /Note/: This parser only gives correct answers for the ASCII
+-- encoding.  For instance, it does not recognise U+00A0 (non-breaking
+-- space) as a space character, even though it is a valid ISO-8859-15
+-- byte.
+space :: Parser Char
+space = satisfy isSpace <?> "space"
+{-# INLINE space #-}
+
+-- | Match a specific character.
+char :: Char -> Parser Char
+char c = satisfy (== c) <?> [c]
+{-# INLINE char #-}
+
+-- | Match a specific character, but return its 'Word8' value.
+char8 :: Char -> Parser Word8
+char8 c = I.satisfy (== c2w c) <?> [c]
+{-# INLINE char8 #-}
+
+-- | Match any character except the given one.
+notChar :: Char -> Parser Char
+notChar c = satisfy (/= c) <?> "not " ++ [c]
+{-# INLINE notChar #-}
+
+-- | Match any character in a set.
+--
+-- >vowel = inClass "aeiou"
+--
+-- Range notation is supported.
+--
+-- >halfAlphabet = inClass "a-nA-N"
+--
+-- To add a literal \'-\' to a set, place it at the beginning or end
+-- of the string.
+inClass :: String -> Char -> Bool
+inClass s = (`memberChar` mySet)
+    where mySet = charClass s
+{-# INLINE inClass #-}
+
+-- | Match any character not in a set.
+notInClass :: String -> Char -> Bool
+notInClass s = not . inClass s
+{-# INLINE notInClass #-}
+
+-- | Consume input as long as the predicate returns 'True', and return
+-- the consumed input.
+--
+-- This parser does not fail.  It will return an empty string if the
+-- predicate returns 'False' on the first byte of input.
+--
+-- /Note/: Because this parser does not fail, do not use it with
+-- combinators such as 'many', because such parsers loop until a
+-- failure occurs.  Careless use will thus result in an infinite loop.
+takeWhile :: (Char -> Bool) -> Parser B.ByteString
+takeWhile p = I.takeWhile (p . w2c)
+{-# INLINE takeWhile #-}
+
+-- | A stateful scanner.  The predicate consumes and transforms a
+-- state argument, and each transformed state is passed to successive
+-- invocations of the predicate on each byte of the input until one
+-- returns 'Nothing' or the input ends.
+--
+-- This parser does not fail.  It will return an empty string if the
+-- predicate returns 'Nothing' on the first byte of input.
+--
+-- /Note/: Because this parser does not fail, do not use it with
+-- combinators such as 'many', because such parsers loop until a
+-- failure occurs.  Careless use will thus result in an infinite loop.
+scan :: s -> (s -> Char -> Maybe s) -> Parser B.ByteString
+scan s0 p = I.scan s0 (\s -> p s . w2c)
+{-# INLINE scan #-}
+
+-- | Consume input as long as the predicate returns 'False'
+-- (i.e. until it returns 'True'), and return the consumed input.
+--
+-- This parser does not fail.  It will return an empty string if the
+-- predicate returns 'True' on the first byte of input.
+--
+-- /Note/: Because this parser does not fail, do not use it with
+-- combinators such as 'many', because such parsers loop until a
+-- failure occurs.  Careless use will thus result in an infinite loop.
+takeTill :: (Char -> Bool) -> Parser B.ByteString
+takeTill p = I.takeTill (p . w2c)
+{-# INLINE takeTill #-}
+
+-- | Skip past input for as long as the predicate returns 'True'.
+skipWhile :: (Char -> Bool) -> Parser ()
+skipWhile p = I.skipWhile (p . w2c)
+{-# INLINE skipWhile #-}
+
+-- | Skip over white space.
+skipSpace :: Parser ()
+skipSpace = I.skipWhile isSpace_w8
+{-# INLINE skipSpace #-}
+
+-- $specalt
+--
+-- The '.*>' and '<*.' combinators are intended for use with the
+-- @OverloadedStrings@ language extension.  They simplify the common
+-- task of matching a statically known string, then immediately
+-- parsing something else.
+--
+-- An example makes this easier to understand:
+--
+-- @{-\# LANGUAGE OverloadedStrings #-}
+--
+-- shoeSize = \"Shoe size: \" '.*>' 'decimal'
+-- @
+--
+-- If we were to try to use '*>' above instead, the type checker would
+-- not be able to tell which 'IsString' instance to use for the text
+-- in quotes.  We would have to be explicit, using either a type
+-- signature or the 'I.string' parser.
+
+-- | Type-specialized version of '*>' for 'B.ByteString'.
+(.*>) :: B.ByteString -> Parser a -> Parser a
+s .*> f = I.string s *> f
+
+-- | Type-specialized version of '<*' for 'B.ByteString'.
+(<*.) :: Parser a -> B.ByteString -> Parser a
+f <*. s = f <* I.string s
+
+-- | A predicate that matches either a carriage return @\'\\r\'@ or
+-- newline @\'\\n\'@ character.
+isEndOfLine :: Word8 -> Bool
+isEndOfLine w = w == 13 || w == 10
+{-# INLINE isEndOfLine #-}
+
+-- | A predicate that matches either a space @\' \'@ or horizontal tab
+-- @\'\\t\'@ character.
+isHorizontalSpace :: Word8 -> Bool
+isHorizontalSpace w = w == 32 || w == 9
+{-# INLINE isHorizontalSpace #-}
+
+-- | Parse and decode an unsigned hexadecimal number.  The hex digits
+-- @\'a\'@ through @\'f\'@ may be upper or lower case.
+--
+-- This parser does not accept a leading @\"0x\"@ string.
+hexadecimal :: (Integral a, Bits a) => Parser a
+hexadecimal = B8.foldl' step 0 `fmap` I.takeWhile1 isHexDigit
+  where
+    isHexDigit w = (w >= 48 && w <= 57) ||
+                   (w >= 97 && w <= 102) ||
+                   (w >= 65 && w <= 70)
+    step a w | w >= 48 && w <= 57  = (a `shiftL` 4) .|. fromIntegral (w - 48)
+             | w >= 97             = (a `shiftL` 4) .|. fromIntegral (w - 87)
+             | otherwise           = (a `shiftL` 4) .|. fromIntegral (w - 55)
+{-# SPECIALISE hexadecimal :: Parser Int #-}
+{-# SPECIALISE hexadecimal :: Parser Int8 #-}
+{-# SPECIALISE hexadecimal :: Parser Int16 #-}
+{-# SPECIALISE hexadecimal :: Parser Int32 #-}
+{-# SPECIALISE hexadecimal :: Parser Int64 #-}
+{-# SPECIALISE hexadecimal :: Parser Integer #-}
+{-# SPECIALISE hexadecimal :: Parser Word #-}
+{-# SPECIALISE hexadecimal :: Parser Word8 #-}
+{-# SPECIALISE hexadecimal :: Parser Word16 #-}
+{-# SPECIALISE hexadecimal :: Parser Word32 #-}
+{-# SPECIALISE hexadecimal :: Parser Word64 #-}
+
+-- | Parse and decode an unsigned decimal number.
+decimal :: Integral a => Parser a
+decimal = B8.foldl' step 0 `fmap` I.takeWhile1 isDig
+  where isDig w  = w >= 48 && w <= 57
+        step a w = a * 10 + fromIntegral (w - 48)
+{-# SPECIALISE decimal :: Parser Int #-}
+{-# SPECIALISE decimal :: Parser Int8 #-}
+{-# SPECIALISE decimal :: Parser Int16 #-}
+{-# SPECIALISE decimal :: Parser Int32 #-}
+{-# SPECIALISE decimal :: Parser Int64 #-}
+{-# SPECIALISE decimal :: Parser Integer #-}
+{-# SPECIALISE decimal :: Parser Word #-}
+{-# SPECIALISE decimal :: Parser Word8 #-}
+{-# SPECIALISE decimal :: Parser Word16 #-}
+{-# SPECIALISE decimal :: Parser Word32 #-}
+{-# SPECIALISE decimal :: Parser Word64 #-}
+
+-- | Parse a number with an optional leading @\'+\'@ or @\'-\'@ sign
+-- character.
+signed :: Num a => Parser a -> Parser a
+{-# SPECIALISE signed :: Parser Int -> Parser Int #-}
+{-# SPECIALISE signed :: Parser Int8 -> Parser Int8 #-}
+{-# SPECIALISE signed :: Parser Int16 -> Parser Int16 #-}
+{-# SPECIALISE signed :: Parser Int32 -> Parser Int32 #-}
+{-# SPECIALISE signed :: Parser Int64 -> Parser Int64 #-}
+{-# SPECIALISE signed :: Parser Integer -> Parser Integer #-}
+signed p = (negate <$> (char8 '-' *> p))
+       <|> (char8 '+' *> p)
+       <|> p
+
+-- | Parse a rational number.
+--
+-- This parser accepts an optional leading sign character, followed by
+-- at least one decimal digit.  The syntax similar to that accepted by
+-- the 'read' function, with the exception that a trailing @\'.\'@ or
+-- @\'e\'@ /not/ followed by a number is not consumed.
+--
+-- Examples with behaviour identical to 'read', if you feed an empty
+-- continuation to the first result:
+--
+-- >rational "3"     == Done 3.0 ""
+-- >rational "3.1"   == Done 3.1 ""
+-- >rational "3e4"   == Done 30000.0 ""
+-- >rational "3.1e4" == Done 31000.0, ""
+--
+-- Examples with behaviour identical to 'read':
+--
+-- >rational ".3"    == Fail "input does not start with a digit"
+-- >rational "e3"    == Fail "input does not start with a digit"
+--
+-- Examples of differences from 'read':
+--
+-- >rational "3.foo" == Done 3.0 ".foo"
+-- >rational "3e"    == Done 3.0 "e"
+--
+-- This function does not accept string representations of \"NaN\" or
+-- \"Infinity\".
+rational :: Fractional a => Parser a
+{-# SPECIALIZE rational :: Parser Double #-}
+{-# SPECIALIZE rational :: Parser Float #-}
+{-# SPECIALIZE rational :: Parser Rational #-}
+{-# SPECIALIZE rational :: Parser Scientific #-}
+rational = scientifically realToFrac
+
+-- | Parse a rational number.
+--
+-- The syntax accepted by this parser is the same as for 'rational'.
+--
+-- /Note/: This function is almost ten times faster than 'rational',
+-- but is slightly less accurate.
+--
+-- The 'Double' type supports about 16 decimal places of accuracy.
+-- For 94.2% of numbers, this function and 'rational' give identical
+-- results, but for the remaining 5.8%, this function loses precision
+-- around the 15th decimal place.  For 0.001% of numbers, this
+-- function will lose precision at the 13th or 14th decimal place.
+--
+-- This function does not accept string representations of \"NaN\" or
+-- \"Infinity\".
+double :: Parser Double
+double = rational
+
+-- | Parse a number, attempting to preserve both speed and precision.
+--
+-- The syntax accepted by this parser is the same as for 'rational'.
+--
+-- /Note/: This function is almost ten times faster than 'rational'.
+-- On integral inputs, it gives perfectly accurate answers, and on
+-- floating point inputs, it is slightly less accurate than
+-- 'rational'.
+--
+-- This function does not accept string representations of \"NaN\" or
+-- \"
+number :: Parser Number
+number = scientifically $ \s ->
+            let e = base10Exponent s
+                c = coefficient s
+            in if e >= 0
+               then I (c * 10 ^ e)
+               else D (fromInteger c / 10 ^ negate e)
+
+-- | Parse a scientific number.
+--
+-- The syntax accepted by this parser is the same as for 'rational'.
+scientific :: Parser Scientific
+scientific = scientifically id
+
+{-# INLINE scientifically #-}
+scientifically :: (Scientific -> a) -> Parser a
+scientifically h = do
+  let minus = 45
+      plus  = 43
+  !positive <- ((== plus) <$> I.satisfy (\c -> c == minus || c == plus)) <|>
+               pure True
+
+  n <- decimal
+
+  let f fracDigits = Sci.scientific (B8.foldl' step n fracDigits)
+                                    (negate $ B8.length fracDigits)
+      step a w = a * 10 + fromIntegral (w - 48)
+
+  s <- let dot = 46 in
+       (I.satisfy (==dot) *> (f <$> I.takeWhile isDigit_w8)) <|>
+         pure (Sci.scientific n 0)
+
+  let !signedCoeff | positive  =          coefficient s
+                   | otherwise = negate $ coefficient s
+
+  let littleE = 101
+      bigE    = 69
+  (I.satisfy (\c -> c == littleE || c == bigE) *>
+      fmap (h . Sci.scientific signedCoeff . (base10Exponent s +)) (signed decimal)) <|>
+    return (h $ Sci.scientific signedCoeff   (base10Exponent s))

Data/Picoparsec/ByteString/Internal.hs

+{-# LANGUAGE BangPatterns, Rank2Types, OverloadedStrings,
+    RecordWildCards, MagicHash, UnboxedTuples #-}
+-- |
+-- Module      :  Data.Picoparsec.ByteString.Internal
+-- Copyright   :  Bryan O'Sullivan 2007-2011, Mario Blažević <blamario@yahoo.com> 2014
+-- License     :  BSD3
+--
+-- Maintainer  :  Mario Blažević
+-- Stability   :  experimental
+-- Portability :  unknown
+--
+-- Simple, efficient parser combinators for 'B.ByteString' strings,
+-- loosely based on the Parsec library.
+
+module Data.Picoparsec.ByteString.Internal
+    (
+      Parser,
+      
+    -- * Parsing individual bytes
+      satisfy
+    , satisfyWith
+    , anyWord8
+    , skip
+    , word8
+    , notWord8
+
+    -- ** Lookahead
+    , peekWord8
+    , peekWord8'
+
+    -- ** Byte classes
+    , inClass
+    , notInClass
+    , storable
+
+    -- * Efficient string handling
+    , skipWhile
+    , string
+    , stringTransform
+    , take
+    , scan
+    , runScanner
+    , takeWhile
+    , takeWhile1
+    , takeTill
+
+    -- * Utilities
+    , endOfLine
+    ) where
+
+import Control.Applicative ((<|>), (<$>))
+import Control.Monad (when)
+import Data.Picoparsec.ByteString.FastSet (charClass, memberWord8)
+import Data.Picoparsec.Combinator
+import Data.Picoparsec.Internal.Types
+    hiding (Parser, Input, Added, Failure, Success)
+import Data.Picoparsec.Internal
+import Data.Word (Word8)
+import Foreign.ForeignPtr (withForeignPtr)
+import Foreign.Ptr (castPtr, minusPtr, plusPtr)
+import Foreign.Storable (Storable(peek, sizeOf))
+import Prelude hiding (getChar, take, takeWhile)
+import qualified Data.Picoparsec.Internal.Types as T
+import qualified Data.ByteString as B8
+import qualified Data.ByteString.Char8 as B
+import qualified Data.ByteString.Internal as B
+import qualified Data.ByteString.Unsafe as B
+
+import GHC.Base (realWorld#)
+import GHC.IO (IO(IO))
+
+type Parser = T.Parser B.ByteString
+
+-- | The parser @satisfy p@ succeeds for any byte for which the
+-- predicate @p@ returns 'True'. Returns the byte that is actually
+-- parsed.
+--
+-- >digit = satisfy isDigit
+-- >    where isDigit w = w >= 48 && w <= 57
+satisfy :: (Word8 -> Bool) -> Parser Word8
+satisfy = satisfyElem
+{-# INLINE satisfy #-}
+
+-- | The parser @skip p@ succeeds for any byte for which the predicate
+-- @p@ returns 'True'.
+--
+-- >skipDigit = skip isDigit
+-- >    where isDigit w = w >= 48 && w <= 57
+skip :: (Word8 -> Bool) -> Parser ()
+skip p = do
+  s <- ensure 1
+  if p (B.unsafeHead s)
+    then put (B.unsafeTail s)
+    else fail "skip"
+
+-- | The parser @satisfyWith f p@ transforms a byte, and succeeds if
+-- the predicate @p@ returns 'True' on the transformed value. The
+-- parser returns the transformed byte that was parsed.
+satisfyWith :: (Word8 -> a) -> (a -> Bool) -> Parser a
+satisfyWith f p = do
+  s <- ensure 1
+  let c = f $! B.unsafeHead s
+  if p c
+    then let !t = B.unsafeTail s
+         in put t >> return c
+    else fail "satisfyWith"
+{-# INLINE satisfyWith #-}
+
+storable :: Storable a => Parser a
+storable = hack undefined
+ where
+  hack :: Storable b => b -> Parser b
+  hack dummy = do
+    (fp,o,_) <- B.toForeignPtr `fmap` take (sizeOf dummy)
+    return . B.inlinePerformIO . withForeignPtr fp $ \p ->
+        peek (castPtr $ p `plusPtr` o)
+
+-- | Consume @n@ bytes of input, but succeed only if the predicate
+-- returns 'True'.
+takeWith :: Int -> (B.ByteString -> Bool) -> Parser B.ByteString
+takeWith n0 p = do
+  let n = max n0 0
+  s <- ensure n
+  let h = B.unsafeTake n s
+      t = B.unsafeDrop n s
+  if p h
+    then put t >> return h
+    else fail "takeWith"
+
+-- | Consume exactly @n@ bytes of input.
+take :: Int -> Parser B.ByteString
+take n = takeWith n (const True)
+{-# INLINE take #-}
+
+-- | @string s@ parses a sequence of bytes that identically match
+-- @s@. Returns the parsed string (i.e. @s@).  This parser consumes no
+-- input if it fails (even if a partial match).
+--
+-- /Note/: The behaviour of this parser is different to that of the
+-- similarly-named parser in Parsec, as this one is all-or-nothing.
+-- To illustrate the difference, the following parser will fail under
+-- Parsec given an input of @\"for\"@:
+--
+-- >string "foo" <|> string "for"
+--
+-- The reason for its failure is that the first branch is a
+-- partial match, and will consume the letters @\'f\'@ and @\'o\'@
+-- before failing.  In Picoparsec, the above parser will /succeed/ on
+-- that input, because the failed first branch will consume nothing.
+string :: B.ByteString -> Parser B.ByteString
+string s = takeWith (B.length s) (==s)
+{-# INLINE string #-}
+
+stringTransform :: (B.ByteString -> B.ByteString) -> B.ByteString
+                -> Parser B.ByteString
+stringTransform f s = takeWith (B.length s) ((==f s) . f)
+{-# INLINE stringTransform #-}
+
+-- | Skip past input for as long as the predicate returns 'True'.
+skipWhile :: (Word8 -> Bool) -> Parser ()
+skipWhile p = go
+ where
+  go = do
+    t <- B8.dropWhile p <$> get
+    put t
+    when (B.null t) $ do
+      input <- wantInput
+      when input go
+{-# INLINE skipWhile #-}
+
+-- | Consume input as long as the predicate returns 'False'
+-- (i.e. until it returns 'True'), and return the consumed input.
+--
+-- This parser does not fail.  It will return an empty string if the
+-- predicate returns 'True' on the first byte of input.
+--
+-- /Note/: Because this parser does not fail, do not use it with
+-- combinators such as 'many', because such parsers loop until a
+-- failure occurs.  Careless use will thus result in an infinite loop.
+takeTill :: (Word8 -> Bool) -> Parser B.ByteString
+takeTill p = takeWhile (not . p)
+{-# INLINE takeTill #-}
+
+-- | Consume input as long as the predicate returns 'True', and return
+-- the consumed input.
+--
+-- This parser does not fail.  It will return an empty string if the
+-- predicate returns 'False' on the first byte of input.
+--
+-- /Note/: Because this parser does not fail, do not use it with
+-- combinators such as 'many', because such parsers loop until a
+-- failure occurs.  Careless use will thus result in an infinite loop.
+takeWhile :: (Word8 -> Bool) -> Parser B.ByteString
+takeWhile p = (B.concat . reverse) `fmap` go []
+ where
+  go acc = do
+    (h,t) <- B8.span p <$> get
+    put t
+    if B.null t
+      then do
+        input <- wantInput
+        if input
+          then go (h:acc)
+          else return (h:acc)
+      else return (h:acc)
+{-# INLINE takeWhile #-}
+
+data T s = T {-# UNPACK #-} !Int s
+
+scan_ :: (s -> [B.ByteString] -> Parser r) -> s -> (s -> Word8 -> Maybe s)
+         -> Parser r
+scan_ f s0 p = go [] s0
+ where
+  go acc s1 = do
+    let scanner (B.PS fp off len) =
+          withForeignPtr fp $ \ptr0 -> do
+            let start = ptr0 `plusPtr` off
+                end   = start `plusPtr` len
+                inner ptr !s
+                  | ptr < end = do
+                    w <- peek ptr
+                    case p s w of
+                      Just s' -> inner (ptr `plusPtr` 1) s'
+                      _       -> done (ptr `minusPtr` start) s
+                  | otherwise = done (ptr `minusPtr` start) s
+                done !i !s = return (T i s)
+            inner start s1
+    bs <- get
+    let T i s' = inlinePerformIO $ scanner bs
+        !h = B.unsafeTake i bs
+        !t = B.unsafeDrop i bs
+    put t
+    if B.null t
+      then do
+        input <- wantInput
+        if input
+          then go (h:acc) s'
+          else f s' (h:acc)
+      else f s' (h:acc)
+{-# INLINE scan_ #-}
+
+-- | A stateful scanner.  The predicate consumes and transforms a
+-- state argument, and each transformed state is passed to successive
+-- invocations of the predicate on each byte of the input until one
+-- returns 'Nothing' or the input ends.
+--
+-- This parser does not fail.  It will return an empty string if the
+-- predicate returns 'Nothing' on the first byte of input.
+--
+-- /Note/: Because this parser does not fail, do not use it with
+-- combinators such as 'many', because such parsers loop until a
+-- failure occurs.  Careless use will thus result in an infinite loop.
+scan :: s -> (s -> Word8 -> Maybe s) -> Parser B.ByteString
+scan = scan_ $ \_ chunks ->
+  case chunks of
+    [x] -> return x
+    xs  -> return $! B.concat $ reverse xs
+{-# INLINE scan #-}
+
+-- | Like 'scan', but generalized to return the final state of the
+-- scanner.
+runScanner :: s -> (s -> Word8 -> Maybe s) -> Parser (B.ByteString, s)
+runScanner = scan_ $ \s xs -> return (B.concat (reverse xs), s)
+{-# INLINE runScanner #-}
+
+-- | Consume input as long as the predicate returns 'True', and return
+-- the consumed input.
+--
+-- This parser requires the predicate to succeed on at least one byte
+-- of input: it will fail if the predicate never returns 'True' or if
+-- there is no input left.
+takeWhile1 :: (Word8 -> Bool) -> Parser B.ByteString
+takeWhile1 p = do
+  (`when` demandInput) =<< B.null <$> get
+  (h,t) <- B8.span p <$> get
+  when (B.null h) $ fail "takeWhile1"
+  put t
+  if B.null t
+    then (h<>) `fmap` takeWhile p
+    else return h
+
+-- | Match any byte in a set.
+--
+-- >vowel = inClass "aeiou"
+--
+-- Range notation is supported.
+--
+-- >halfAlphabet = inClass "a-nA-N"
+--
+-- To add a literal @\'-\'@ to a set, place it at the beginning or end
+-- of the string.
+inClass :: String -> Word8 -> Bool
+inClass s = (`memberWord8` mySet)
+    where mySet = charClass s
+          {-# NOINLINE mySet #-}
+{-# INLINE inClass #-}
+
+-- | Match any byte not in a set.
+notInClass :: String -> Word8 -> Bool
+notInClass s = not . inClass s
+{-# INLINE notInClass #-}
+
+-- | Match any byte.
+anyWord8 :: Parser Word8
+anyWord8 = satisfy $ const True
+{-# INLINE anyWord8 #-}
+
+-- | Match a specific byte.
+word8 :: Word8 -> Parser Word8
+word8 c = satisfy (== c) <?> show c
+{-# INLINE word8 #-}
+
+-- | Match any byte except the given one.
+notWord8 :: Word8 -> Parser Word8
+notWord8 c = satisfy (/= c) <?> "not " ++ show c
+{-# INLINE notWord8 #-}
+
+-- | Match any byte, to perform lookahead. Returns 'Nothing' if end of
+-- input has been reached. Does not consume any input.
+--
+-- /Note/: Because this parser does not fail, do not use it with
+-- combinators such as 'many', because such parsers loop until a
+-- failure occurs.  Careless use will thus result in an infinite loop.
+peekWord8 :: Parser (Maybe Word8)
+peekWord8 = T.Parser $ \i0 a0 m0 _kf ks ->
+            if B.null (unI i0)
+            then if m0 == Complete
+                 then ks i0 a0 m0 Nothing
+                 else let ks' i a m = let !w = B.unsafeHead (unI i)
+                                      in ks i a m (Just w)
+                          kf' i a m = ks i a m Nothing
+                      in prompt i0 a0 m0 kf' ks'
+            else let !w = B.unsafeHead (unI i0)
+                 in ks i0 a0 m0 (Just w)
+{-# INLINE peekWord8 #-}
+
+-- | Match any byte, to perform lookahead.  Does not consume any
+-- input, but will fail if end of input has been reached.
+peekWord8' :: Parser Word8
+peekWord8' = do
+  s <- ensure 1
+  return $! B.unsafeHead s
+{-# INLINE peekWord8' #-}
+
+-- | Match either a single newline character @\'\\n\'@, or a carriage
+-- return followed by a newline character @\"\\r\\n\"@.
+endOfLine :: Parser ()
+endOfLine = (word8 10 >> return ()) <|> (string "\r\n" >> return ())
+
+-- | Just like unsafePerformIO, but we inline it. Big performance gains as
+-- it exposes lots of things to further inlining. /Very unsafe/. In
+-- particular, you should do no memory allocation inside an
+-- 'inlinePerformIO' block. On Hugs this is just @unsafePerformIO@.
+inlinePerformIO :: IO a -> a
+inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
+{-# INLINE inlinePerformIO #-}

benchmarks/Benchmarks.hs

 import qualified Data.Attoparsec.Text as AT
 import qualified Data.Attoparsec.Text.Lazy as ATL
 import qualified Data.Picoparsec as AM
+import qualified Data.Picoparsec.ByteString.Char8 as PC
 import qualified Data.ByteString as B
 import qualified Data.ByteString.Char8 as BC
 import qualified Data.ByteString.Lazy as BL
          , bench "isAlpha" $ nf (AB.parse (many (AC.satisfy AC.isAlpha_ascii))) b
          ]
        , bgroup "picoparsec" [
-           -- bench "letter" $ nf (AM.parse (many AM.letter_ascii)) utf8b
-           bench "isAlpha" $ nf (AM.parse (many (AM.satisfyChar AC.isAlpha_ascii))) utf8b
+           bench "letter" $ nf (AM.parse (many PC.letter_ascii)) b
+         , bench "isAlpha" $ nf (AM.parse (many (AM.satisfyChar isAlpha))) utf8b
          ]
        ]
      ]
        bench "attoparsec" $ nf (AB.parse word32LE) b
      , bench "picoparsec" $ nf (AM.parse word32LE') b
      ]
-   , bench "word32LE" $ nf (AB.parse word32LE) b
    , bgroup "scan" [
        bench "short" $ nf (AB.parse quotedString) (BC.pack "abcdefghijk\"")
      , bench "long" $ nf (AB.parse quotedString) b
       text < 1.1
 
   exposed-modules: Data.Picoparsec
+                   Data.Picoparsec.ByteString
+                   Data.Picoparsec.ByteString.Char8
                    Data.Picoparsec.Combinator
                    Data.Picoparsec.Monoid
                    Data.Picoparsec.Number
                    Data.Picoparsec.Types
                    Data.Picoparsec.Zepto
   other-modules:   Data.Picoparsec.ByteString.FastSet
+                   Data.Picoparsec.ByteString.Internal
                    Data.Picoparsec.Monoid.Internal
                    Data.Picoparsec.Internal
                    Data.Picoparsec.Internal.Types