Commits

Bryan O'Sullivan committed 6f227c4

WIP

Comments (0)

Files changed (8)

 author:          Bryan O'Sullivan <bos@serpentine.com>
 maintainer:      Bryan O'Sullivan <bos@serpentine.com>
 stability:       experimental
+tested-with:     GHC == 6.8.3, GHC == 6.10.1
 synopsis:        Fast combinator parsing with Data.ByteString.Lazy
 description:     Fast combinator parsing with Data.ByteString.Lazy
 cabal-version:   >= 1.2
 build-type:      Simple
 description:     Fast, flexible text-oriented parsing of lazy ByteStrings.
+extra-source-files:
+                 src/Data/ParserCombinators/Attoparsec/Char8Boilerplate.h
+                 src/Data/ParserCombinators/Attoparsec/Word8Boilerplate.h
 
 flag split-base
 flag applicative-in-base
   exposed-modules: Data.ParserCombinators.Attoparsec
                    Data.ParserCombinators.Attoparsec.Char8
                    Data.ParserCombinators.Attoparsec.Incremental
+                   Data.ParserCombinators.Attoparsec.Incremental.Char8
                    Data.ParserCombinators.Attoparsec.FastSet
                    Data.ParserCombinators.Attoparsec.Internal
   hs-source-dirs:  src

src/Data/ParserCombinators/Attoparsec.hs

 
     -- * Things vaguely like those in @Parsec.Combinator@ (and @Parsec.Prim@)
     , try
-    , many
     , many1
     , manyTill
     , eof

src/Data/ParserCombinators/Attoparsec/Char8.hs

+{-# LANGUAGE CPP #-}
 -----------------------------------------------------------------------------
 -- |
 -- Module      :  Data.ParserCombinators.Attoparsec.Char8
 
     -- * Things vaguely like those in @Parsec.Combinator@ (and @Parsec.Prim@)
     , try
-    , many
     , many1
     , manyTill
     , eof
     (Parser, ParseError, (<?>), parse, parseAt, parseTest, try, manyTill, eof,
      skipMany, skipMany1, count, lookAhead, peek, sepBy, sepBy1, string,
      eitherP, getInput, getConsumed, takeAll, takeCount, notEmpty, match,
-     endOfLine, setInput, many, many1)
+     endOfLine, setInput, many1)
 import Data.ByteString.Lex.Lazy.Double (readDouble)
 import Prelude hiding (takeWhile)
 
--- | Character parser.
-satisfy :: (Char -> Bool) -> Parser Char
-satisfy p = w2c <$> I.satisfy (p . w2c)
-{-# INLINE satisfy #-}
-
-letter :: Parser Char
-letter = satisfy isLetter <?> "letter"
-{-# INLINE letter #-}
-
-digit :: Parser Char
-digit = satisfy isDigit <?> "digit"
-{-# INLINE digit #-}
-
-anyChar :: Parser Char
-anyChar = satisfy $ const True
-{-# INLINE anyChar #-}
-
-space :: Parser Char
-space = satisfy isSpace <?> "space"
-{-# INLINE space #-}
-
--- | Satisfy a specific character.
-char :: Char -> Parser Char
-char c = satisfy (== c) <?> [c]
-{-# INLINE char #-}
-
--- | Satisfy a specific character.
-notChar :: Char -> Parser Char
-notChar c = satisfy (/= c) <?> "not " ++ [c]
-{-# INLINE notChar #-}
-
-charClass :: String -> FastSet
-charClass = set . SB.pack . go
-    where go (a:'-':b:xs) = [a..b] ++ go xs
-          go (x:xs) = x : go xs
-          go _ = ""
-
-inClass :: String -> Char -> Bool
-inClass s = (`memberChar` myset)
-    where myset = charClass s
-{-# INLINE inClass #-}
-
-notInClass :: String -> Char -> Bool
-notInClass s = not . inClass s
-{-# INLINE notInClass #-}
-
 -- | Satisfy a literal string, ignoring case.
 stringCI :: LB.ByteString -> Parser LB.ByteString
 stringCI = I.stringTransform (LB.map toLower)
 {-# INLINE stringCI #-}
 
--- | Consume characters while the predicate is true.
-takeWhile :: (Char -> Bool) -> Parser LB.ByteString
-takeWhile p = I.takeWhile (p . w2c)
-{-# INLINE takeWhile #-}
-
-takeTill :: (Char -> Bool) -> Parser LB.ByteString
-takeTill p = I.takeTill (p . w2c)
-{-# INLINE takeTill #-}
-
 takeWhile1 :: (Char -> Bool) -> Parser LB.ByteString
 takeWhile1 p = I.takeWhile1 (p . w2c)
 {-# INLINE takeWhile1 #-}
 
--- | Skip over characters while the predicate is true.
-skipWhile :: (Char -> Bool) -> Parser ()
-skipWhile p = I.skipWhile (p . w2c)
-{-# INLINE skipWhile #-}
-
--- | Skip over white space.
-skipSpace :: Parser ()
-skipSpace = takeWhile isSpace >> return ()
-{-# INLINE skipSpace #-}
-
 numeric :: String -> (LB.ByteString -> Maybe (a,LB.ByteString)) -> Parser a
 numeric desc f = do
   s <- getInput
 -- | Parse a Double.  The position counter is not updated.
 double :: Parser Double
 double = numeric "Double" readDouble
+
+#define PARSER Parser
+#include "Char8Boilerplate.h"

src/Data/ParserCombinators/Attoparsec/Char8Boilerplate.h

+-- -*- haskell -*-
+
+-- | Character parser.
+satisfy :: (Char -> Bool) -> PARSER Char
+satisfy p = w2c <$> I.satisfy (p . w2c)
+{-# INLINE satisfy #-}
+
+letter :: PARSER Char
+letter = satisfy isLetter <?> "letter"
+{-# INLINE letter #-}
+
+digit :: PARSER Char
+digit = satisfy isDigit <?> "digit"
+{-# INLINE digit #-}
+
+anyChar :: PARSER Char
+anyChar = satisfy $ const True
+{-# INLINE anyChar #-}
+
+space :: PARSER Char
+space = satisfy isSpace <?> "space"
+{-# INLINE space #-}
+
+-- | Satisfy a specific character.
+char :: Char -> PARSER Char
+char c = satisfy (== c) <?> [c]
+{-# INLINE char #-}
+
+-- | Satisfy a specific character.
+notChar :: Char -> PARSER Char
+notChar c = satisfy (/= c) <?> "not " ++ [c]
+{-# INLINE notChar #-}
+
+charClass :: String -> FastSet
+charClass = set . SB.pack . go
+    where go (a:'-':b:xs) = [a..b] ++ go xs
+          go (x:xs) = x : go xs
+          go _ = ""
+
+inClass :: String -> Char -> Bool
+inClass s = (`memberChar` myset)
+    where myset = charClass s
+{-# INLINE inClass #-}
+
+notInClass :: String -> Char -> Bool
+notInClass s = not . inClass s
+{-# INLINE notInClass #-}
+
+-- | Consume characters while the predicate is true.
+takeWhile :: (Char -> Bool) -> PARSER LB.ByteString
+takeWhile p = I.takeWhile (p . w2c)
+{-# INLINE takeWhile #-}
+
+takeTill :: (Char -> Bool) -> PARSER LB.ByteString
+takeTill p = I.takeTill (p . w2c)
+{-# INLINE takeTill #-}
+
+-- | Skip over characters while the predicate is true.
+skipWhile :: (Char -> Bool) -> PARSER ()
+skipWhile p = I.skipWhile (p . w2c)
+{-# INLINE skipWhile #-}
+
+-- | Skip over white space.
+skipSpace :: PARSER ()
+skipSpace = takeWhile isSpace >> return ()
+{-# INLINE skipSpace #-}

src/Data/ParserCombinators/Attoparsec/Incremental.hs

+{-# LANGUAGE BangPatterns, CPP #-}
 -----------------------------------------------------------------------------
 -- |
 -- Module      :  Data.ParserCombinators.Attoparsec.Incremental
 -- Simple, efficient parser combinators for lazy 'LB.ByteString'
 -- strings, loosely based on 'Text.ParserCombinators.Parsec'.
 --
--- Heavily influenced by Adam Langley's incremental parser in the
--- binary-strict package.
+-- This module is heavily influenced by Adam Langley's incremental
+-- parser in his binary-strict package.
 -- 
 -----------------------------------------------------------------------------
 module Data.ParserCombinators.Attoparsec.Incremental
     , Result(..)
     , parse
 
+    , (<?>)
     , takeWhile
     , takeTill
+    , takeCount
     , string
+    , satisfy
+    , pushBack
+
+    , word8
+    , notWord8
+    , anyWord8
+
+    , many1
+    , skipWhile
+    , skipMany
+    , skipMany1
+    , sepBy
+    , sepBy1
+    , manyTill
+    , count
 
     , yield
     ) where
 
-import Control.Applicative (Alternative(..), Applicative(..))
+import Control.Applicative
 import Control.Monad (MonadPlus(..), ap)
 import Data.ParserCombinators.Attoparsec.Internal ((+:))
 import Data.Word (Word8)
 instance Functor (Parser r) where
     fmap f m = Parser $ \s cont -> unParser m s (cont . f)
 
-instance MonadPlus (Parser r) where
-    mzero = zero
-    mplus = plus
+infix 0 <?>
 
-instance Applicative (Parser r) where
-    pure = return
-    (<*>) = ap
-
-instance Alternative (Parser r) where
-    empty = zero
-    (<|>) = plus
+-- | Name the parser.
+(<?>) :: Parser r a -> String -> Parser r a
+{-# INLINE (<?>) #-}
+p <?> msg =
+  Parser $ \st k ->
+    case unParser p st k of
+      IFailed st' _ -> IFailed st' msg
+      ok -> ok
 
 initState :: S.ByteString -> S
 initState input = S input L.empty [] 0
          -> Parser r L.ByteString
 takeWith splitf =
   Parser $ \(S sb lb adds failDepth) k ->
-    let bs = sb +: lb
-        (left,rest) = splitf bs
-    in case rest of
-         L.Empty -> IPartial $ \s ->
-                    let s' = S s L.empty (addX s adds) failDepth
-                        k' a = k (appL left a)
-                    in unParser (takeWith splitf) s' k'
-         L.Chunk h t -> k left (S h t adds failDepth)
-
-string :: L.ByteString -> Parser r L.ByteString
-string s = string' s
- where
-  string' r =
-   Parser $ \st@(S sb lb adds failDepth) k ->
-     let bs = sb +: lb
-         l = L.length r
-     in case L.splitAt l bs of
-          (h,t)
-            | h == r -> k s (mkState t adds failDepth)
-          (h,L.Empty)
-            | h `L.isPrefixOf` r ->
-                IPartial $ \s' ->
-                let st' = S s' L.empty (addX s' adds) failDepth
-                    k' a = k (appL h a)
-                in unParser (string (L.drop (L.length h) r)) st' k'
-          _ -> IFailed st "string failed to match"
-
+  let (left,rest) = splitf (sb +: lb)
+  in case rest of
+       L.Empty -> IPartial $ \s ->
+                  let s' = S s L.empty (addX s adds) failDepth
+                      k' a = k (appL left a)
+                  in unParser (takeWith splitf) s' k'
+       L.Chunk h t -> k left (S h t adds failDepth)
+    
 takeWhile :: (Word8 -> Bool) -> Parser r L.ByteString
 takeWhile = takeWith . L.span
 
 takeTill :: (Word8 -> Bool) -> Parser r L.ByteString
 takeTill = takeWith . L.break
 
+takeCount :: Int -> Parser r L.ByteString
+takeCount = tc . fromIntegral where
+ tc (!n) = Parser $ \(S sb lb adds failDepth) k ->
+           let (h,t) = L.splitAt n (sb +: lb)
+               l = L.length h
+           in if L.length h == n
+              then k h (mkState t adds failDepth)
+              else IPartial $ \s ->
+                   let st = S s L.empty (addX s adds) failDepth
+                       k' a = k (appL h a)
+                   in unParser (tc (n - l)) st k'
+
+string :: L.ByteString -> Parser r L.ByteString
+string s =
+  Parser $ \st@(S sb lb adds failDepth) k ->
+    case L.splitAt (L.length s) (sb +: lb) of
+      (h,t)
+        | h == s -> k s (mkState t adds failDepth)
+      (h,L.Empty)
+        | h `L.isPrefixOf` s ->
+            IPartial $ \s' ->
+            let st'  = S s' L.empty (addX s' adds) failDepth
+                k' a = k (appL h a)
+                r'   = L.drop (L.length h) s
+            in unParser (string r') st' k'
+      _ -> IFailed st "string failed to match"
+
+satisfy :: (Word8 -> Bool) -> Parser r Word8
+satisfy p =
+  Parser $ \st@(S sb lb adds failDepth) k ->
+    case L.uncons (sb +: lb) of
+      Just (w, lb') | p w -> k w (mkState lb' adds failDepth)
+                    | otherwise -> IFailed st "failed to match"
+      Nothing -> IPartial $ \s ->
+                 let st' = S s L.empty (addX s adds) failDepth
+                 in unParser (satisfy p) st' k
+
+pushBack :: L.ByteString -> Parser r ()
+pushBack bs =
+    Parser $ \(S sb lb adds failDepth) k ->
+        k () (mkState (bs `appL` (sb +: lb)) adds failDepth)
+
 toplevelTranslate :: IResult a -> Result a
 toplevelTranslate (IFailed _ err) = Failed err
 toplevelTranslate (IDone (S sb lb _ _) value) = Done (sb +: lb) value
 parse :: Parser r r -> S.ByteString -> Result r
 parse m input =
   toplevelTranslate $ unParser m (initState input) terminalContinuation
+
+#define PARSER Parser r
+#include "Word8Boilerplate.h"

src/Data/ParserCombinators/Attoparsec/Incremental/Char8.hs

+{-# LANGUAGE CPP #-}
+-----------------------------------------------------------------------------
+-- |
+-- Module      :  Data.ParserCombinators.Attoparsec.Incremental.Char8
+-- Copyright   :  Daan Leijen 1999-2001, Jeremy Shaw 2006, Bryan O'Sullivan 2007-2008
+-- License     :  BSD3
+-- 
+-- Maintainer  :  bos@serpentine.com
+-- Stability   :  experimental
+-- Portability :  unknown
+--
+-- Simple, efficient parser combinators for lazy 'LB.ByteString'
+-- strings, loosely based on 'Text.ParserCombinators.Parsec'.
+-- 
+-----------------------------------------------------------------------------
+module Data.ParserCombinators.Attoparsec.Incremental.Char8
+    (
+    -- * Parser
+      Parser
+    , Result(..)
+
+    -- * Running parsers
+    , parse
+
+    -- * Combinators
+    , (<?>)
+
+    -- * Things vaguely like those in @Parsec.Combinator@ (and @Parsec.Prim@)
+    , many1
+    , manyTill
+    , skipMany
+    , skipMany1
+    , count
+    , sepBy
+    , sepBy1
+    , pushBack
+
+    -- * Things like in @Parsec.Char@
+    , satisfy
+    , letter
+    , digit
+    , anyChar
+    , space
+    , char
+    , notChar
+    , string
+
+    -- * Numeric parsers.
+    , int
+    , integer
+    , double
+
+    -- * Miscellaneous functions.
+    , takeWhile
+    , takeTill
+    , takeCount
+    , skipWhile
+    , skipSpace
+    , inClass
+    , notInClass
+    ) where
+
+import Control.Applicative ((<$>))
+import qualified Data.ByteString.Char8 as SB
+import qualified Data.ByteString.Lazy.Char8 as LB
+import Data.ByteString.Internal (w2c)
+import Data.Char (isDigit, isLetter, isSpace)
+import Data.ParserCombinators.Attoparsec.FastSet
+    (FastSet, memberChar, set)
+import qualified Data.ParserCombinators.Attoparsec.Incremental as I
+import Data.ParserCombinators.Attoparsec.Incremental
+    (Parser, Result(..), (<?>), parse, manyTill, pushBack,
+     skipMany, skipMany1, count, sepBy, sepBy1, string,
+     takeCount, many1)
+import Data.ByteString.Lex.Lazy.Double (readDouble)
+import Prelude hiding (takeWhile)
+
+numeric :: String -> (Char -> Bool)
+         -> (LB.ByteString -> Maybe (a,LB.ByteString)) -> Parser r a
+numeric desc p f = do
+  s <- takeWhile p
+  case f s of
+    Nothing -> pushBack s >> fail desc
+    Just (i,s') -> pushBack s' >> return i
+                   
+isIntegral :: Char -> Bool
+isIntegral c = isDigit c || c == '-'
+
+-- | Parse an integer.  The position counter is not updated.
+int :: Parser r Int
+int = numeric "Int" isIntegral LB.readInt
+
+-- | Parse an integer.  The position counter is not updated.
+integer :: Parser r Integer
+integer = numeric "Integer" isIntegral LB.readInteger
+
+-- | Parse a Double.  The position counter is not updated.
+double :: Parser r Double
+double = numeric "Double" isDouble readDouble
+    where isDouble c = isIntegral c || c == 'e' || c == '+'
+
+#define PARSER Parser r
+#include "../Char8Boilerplate.h"

src/Data/ParserCombinators/Attoparsec/Internal.hs

+{-# LANGUAGE CPP #-}
 -----------------------------------------------------------------------------
 -- |
 -- Module      :  Data.ParserCombinators.Attoparsec.Internal
 
     -- * Things vaguely like those in @Parsec.Combinator@ (and @Parsec.Prim@)
     , try
-    , many
     , many1
     , manyTill
     , eof
     , (+:)
     ) where
 
-import Control.Applicative (Alternative(..), Applicative(..), (<$>))
-import Control.Monad (MonadPlus(..), ap, liftM2)
+import Control.Applicative
+import Control.Monad (MonadPlus(..), ap)
 import Control.Monad.Fix (MonadFix(..))
 import qualified Data.ByteString as SB
 import qualified Data.ByteString.Lazy as LB
           ok -> ok
 {-# INLINE plus #-}
 
-instance MonadPlus Parser where
-    mzero = zero
-    mplus = plus
-
-instance Applicative Parser where
-    pure = return
-    (<*>) = ap
-
-instance Alternative Parser where
-    empty = zero
-    (<|>) = plus
-
 mkState :: LB.ByteString -> Int64 -> S
 mkState s = case s of
               LB.Empty -> S SB.empty s
              Nothing -> unParser (nextChunk >> satisfy p) s
 {-# INLINE satisfy #-}
 
-anyWord8 :: Parser Word8
-anyWord8 = satisfy $ const True
-{-# INLINE anyWord8 #-}
-
--- | Satisfy a specific character.
-word8 :: Word8 -> Parser Word8
-word8 c = satisfy (== c) <?> show c
-{-# INLINE word8 #-}
-
--- | Satisfy a specific character.
-notWord8 :: Word8 -> Parser Word8
-notWord8 c = satisfy (/= c) <?> "not " ++ show c
-{-# INLINE notWord8 #-}
-
-sepBy :: Parser a -> Parser s -> Parser [a]
-sepBy p s = liftM2 (:) p ((s >> sepBy1 p s) <|> return []) <|> return []
-
-sepBy1 :: Parser a -> Parser s -> Parser [a]
-sepBy1 p s = liftM2 (:) p ((s >> sepBy1 p s) <|> return [])
-
 -- | Satisfy a literal string.
 string :: LB.ByteString -> Parser LB.ByteString
 string s = Parser $ \(S sb lb n) ->
     where fs = f s
 {-# INLINE stringTransform #-}
 
--- | Apply the given parser repeatedly, returning every parse result.
-count :: Int -> Parser a -> Parser [a]
-count n p = sequence (replicate n p)
-{-# INLINE count #-}
-
 try :: Parser a -> Parser a
 try p = Parser $ \s@(S sb lb _) ->
         case unParser p s of
           let bs = sb +: lb
           in Right (bs, mkState LB.empty (n + LB.length bs))
 
-takeCount :: Int64 -> Parser LB.ByteString
+takeCount :: Int -> Parser LB.ByteString
 takeCount k =
   Parser $ \(S sb lb n) ->
       let bs = sb +: lb
-          (h,t) = LB.splitAt k bs
-      in if LB.length h == k
-         then Right (h, mkState t (n + k))
+          k' = fromIntegral k
+          (h,t) = LB.splitAt k' bs
+      in if LB.length h == k'
+         then Right (h, mkState t (n + k'))
          else Left (bs, [show k ++ " bytes"])
 
 -- | Consume characters while the predicate is true.
             | otherwise -> Right (h, mkState t (n + LB.length h))
 {-# INLINE takeWhile1 #-}
 
--- | Skip over characters while the predicate is true.
-skipWhile :: (Word8 -> Bool) -> Parser ()
-skipWhile p = takeWhile p >> return ()
-{-# INLINE skipWhile #-}
-
-manyTill :: Parser a -> Parser b -> Parser [a]
-manyTill p end = scan
-    where scan = (end >> return []) <|> liftM2 (:) p scan
-
-many :: Parser a -> Parser [a]
-many p = ((:) <$> p <*> many p) <|> return []
-
-many1 :: Parser a -> Parser [a]
-many1 p = (:) <$> p <*> many p
-
--- |'skipMany' - skip zero or many instances of the parser
-skipMany :: Parser a -> Parser ()
-skipMany p = scan
-    where scan = (p >> scan) <|> return ()
-
--- |'skipMany1' - skip one or many instances of the parser       
-skipMany1 :: Parser  a -> Parser ()
-skipMany1 p = p >> skipMany p
-
 -- | Test that a parser returned a non-null ByteString.
 notEmpty :: Parser LB.ByteString -> Parser LB.ByteString 
 notEmpty p = Parser $ \s ->
     case parse p s of
       (st, Left msg) -> putStrLn $ msg ++ "\nGot:\n" ++ show st
       (_, Right r) -> print r
+
+#define PARSER Parser
+#include "Word8Boilerplate.h"

src/Data/ParserCombinators/Attoparsec/Word8Boilerplate.h

+-- -*- haskell -*-
+-- This file is intended to be #included by other source files.
+
+instance MonadPlus (PARSER) where
+    mzero = zero
+    mplus = plus
+
+instance Applicative (PARSER) where
+    pure = return
+    (<*>) = ap
+
+instance Alternative (PARSER) where
+    empty = zero
+    (<|>) = plus
+
+-- | Skip over characters while the predicate is true.
+skipWhile :: (Word8 -> Bool) -> PARSER ()
+skipWhile p = takeWhile p *> pure ()
+{-# INLINE skipWhile #-}
+
+many1 :: PARSER a -> PARSER [a]
+many1 p = liftA2 (:) p (many p)
+
+sepBy :: PARSER a -> PARSER s -> PARSER [a]
+sepBy p s = liftA2 (:) p ((s *> sepBy1 p s) <|> pure []) <|> pure []
+
+sepBy1 :: PARSER a -> PARSER s -> PARSER [a]
+sepBy1 p s = liftA2 (:) p ((s *> sepBy1 p s) <|> pure [])
+
+manyTill :: PARSER a -> PARSER b -> PARSER [a]
+manyTill p end = scan
+    where scan = (end *> pure []) <|> liftA2 (:) p scan
+
+-- | Skip zero or more instances of the parser.
+skipMany :: PARSER a -> PARSER ()
+skipMany p = scan
+    where scan = (p >> scan) <|> return ()
+
+-- | Skip one or more instances of the parser.
+skipMany1 :: PARSER a -> PARSER ()
+skipMany1 p = p >> skipMany p
+
+-- | Apply the given parser repeatedly, returning every parse result.
+count :: Int -> PARSER a -> PARSER [a]
+count n p = sequence (replicate n p)
+{-# INLINE count #-}
+
+anyWord8 :: PARSER Word8
+anyWord8 = satisfy $ const True
+{-# INLINE anyWord8 #-}
+
+-- | Satisfy a specific character.
+word8 :: Word8 -> PARSER Word8
+word8 c = satisfy (== c) <?> show c
+{-# INLINE word8 #-}
+
+-- | Satisfy a specific character.
+notWord8 :: Word8 -> PARSER Word8
+notWord8 c = satisfy (/= c) <?> "not " ++ show c
+{-# INLINE notWord8 #-}
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.