Bryan O'Sullivan avatar Bryan O'Sullivan committed 187e687

Switch to a more efficient representation, and simplify the API.

Comments (0)

Files changed (1)

src/Text/ParserCombinators/ByteStringParser.hs

 -- Stability   :  experimental
 -- Portability :  unknown
 --
--- Simple, efficient parser combinators for lazy 'C.ByteString'
--- values, loosely based on 'Text.ParserCombinators.Parsec'.
+-- Simple, efficient parser combinators for lazy 'LB.ByteString'
+-- strings, loosely based on 'Text.ParserCombinators.Parsec'.
 -- 
 -----------------------------------------------------------------------------
 module Text.ParserCombinators.ByteStringParser
 
     -- * Combinators
     , succeed
-    , (<|>)
     , (<?>)
 
     -- * Things vaguely like those in @Parsec.Combinator@ (and @Parsec.Prim@)
     , try
-    , many
-    , many1
     , manyTill
     , eof
     , skipMany
     , notChar
     , string
     , stringCI
-    , byteString
-    , byteStringCI
 
     -- * Parser converters.
-    , maybeP
     , eitherP
 
     -- * Miscellaneous functions.
     , notInClass
     ) where
 
-import Control.Applicative (Applicative(..), (<$>), (<*))
+import Control.Applicative (Alternative(..), Applicative(..), (<$>), (<*), (*>))
 import Control.Monad (MonadPlus(..), ap, liftM2)
+import qualified Data.ByteString.Char8 as SB
 import qualified Data.ByteString.Lazy.Char8 as LB
+import qualified Data.ByteString.Lazy.Internal as LB
 import Data.Char (isDigit, isLetter, isSpace, toLower)
 import Data.Int (Int64)
 import qualified Data.Set as S
 
 -- * Parser Monad
 
-data S = S LB.ByteString
+data S = S {-# UNPACK #-} !SB.ByteString
+           LB.ByteString
            {-# UNPACK #-} !Int64
 
+mkState :: LB.ByteString -> Int64 -> S
+mkState s = case s of
+              LB.Empty -> S SB.empty s
+              LB.Chunk x xs -> S x xs
+
 newtype Parser a = Parser {
       unParser :: S -> Either (LB.ByteString, [String]) (a, S)
     }
               Right (a, s') -> Right (f a, s')
               Left err -> Left err
 
+(+:) :: SB.ByteString -> LB.ByteString -> LB.ByteString
+sb +: lb | SB.null sb = lb
+         | otherwise = LB.Chunk sb lb
+{-# INLINE (+:) #-}
+
 instance Monad Parser where
     return a = Parser $ \s -> Right (a, s)
     m >>= f = Parser $ \s ->
               case unParser m s of
                 Right (a, s') -> unParser (f a) s'
                 Left (s', msgs) -> Left (s', msgs)
-    fail err = Parser $ \(S bs _) -> Left (bs, [err])
+    fail err = Parser $ \(S sb lb _) -> Left (sb +: lb, [err])
+
+zero :: Parser a
+zero = Parser $ \(S sb lb _) -> Left (sb +: lb, [])
+{-# INLINE zero #-}
+
+plus :: Parser a -> Parser a -> Parser a
+plus p1 p2 =
+    Parser $ \s@(S sb lb _) ->
+        case unParser p1 s of
+          Left (_, msgs1) -> 
+              case unParser p2 s of
+                Left (_, msgs2) -> Left (sb +: lb, (msgs1 ++ msgs2))
+                ok -> ok
+          ok -> ok
+{-# INLINE plus #-}
 
 instance MonadPlus Parser where
-    mzero = Parser $ \(S bs _) -> Left (bs, [])
-    Parser p1 `mplus` Parser p2 =
-        Parser $ \s@(S bs _) -> case p1 s of
-                         Left (_, msgs1) -> 
-                            case p2 s of
-                              Left (_, msgs2) -> Left (bs, (msgs1 ++ msgs2))
-                              ok -> ok
-                         ok -> ok
+    mzero = zero
+    mplus = plus
 
 #ifdef APPLICATIVE_IN_BASE
 instance Applicative Parser where
     pure = return
     (<*>) = ap
+
+instance Alternative Parser where
+    empty = zero
+    (<|>) = plus
 #endif
 
 -- | Always succeed.
 succeed = return
 
 infix 0 <?>
-infixr 1 <|>
-
--- | Choice.
-(<|>) :: Parser a -> Parser a -> Parser a
-(<|>) = mplus
-{-# INLINE (<|>) #-}
 
 -- | Name the parser.
 (<?>) :: Parser a -> String -> Parser a
 p <?> msg =
-    Parser $ \s@(S bs _) ->
+    Parser $ \s@(S sb lb _) ->
         case unParser p s of
-          (Left _) -> Left (bs, [msg])
+          (Left _) -> Left (sb +: lb, [msg])
           ok -> ok
 {-# INLINE (<?>) #-}
 
+nextChunk :: Parser ()
+nextChunk = Parser $ \(S _ lb n) ->
+            case lb of
+              LB.Chunk sb' lb' -> Right ((), S sb' lb' n)
+              LB.Empty -> Left (lb, [])
+
 -- | Get remaining input.
 getInput :: Parser LB.ByteString
-getInput = Parser $ \s@(S bs _) -> Right (bs, s)
+getInput = Parser $ \s@(S sb lb _) -> Right (sb +: lb, s)
 
 -- | Get number of bytes consumed so far.
 getConsumed :: Parser Int64
-getConsumed = Parser $ \s@(S _ n) -> Right (n, s)
+getConsumed = Parser $ \s@(S _ _ n) -> Right (n, s)
 
 -- | Character parser.
 satisfy :: (Char -> Bool) -> Parser Char
-satisfy f =
-    Parser $ \(S bs n) ->
-           case LB.uncons bs of
-             Just (s, bs') | f s -> Right (s, S bs' (n + 1))
-             _                   -> Left (bs, [])
+satisfy p =
+    Parser $ \s@(S sb lb n) ->
+           case SB.uncons sb of
+             Just (c, sb') | p c -> Right (c, S sb' lb (n + 1))
+                           | otherwise -> Left (sb +: lb, [])
+             Nothing -> unParser (nextChunk >> satisfy p) s
 {-# INLINE satisfy #-}
 
 letter :: Parser Char
 sepBy1 p s = liftM2 (:) p ((s >> sepBy1 p s) <|> return [])
 
 -- | Satisfy a literal string.
-byteString :: LB.ByteString -> Parser LB.ByteString
-byteString s = Parser $ \(S bs n) ->
-               let l = LB.length s
+string :: LB.ByteString -> Parser LB.ByteString
+string s = Parser $ \(S sb lb n) ->
+               let bs = sb +: lb
+                   l = LB.length s
                    (h, t) = LB.splitAt l bs
                in if s == h
-                  then Right (s, S t (n + l))
+                  then Right (s, mkState t (n + l))
                   else Left (bs, [])
-{-# INLINE byteString #-}
+{-# INLINE string #-}
 
--- | Satisfy a literal string.
-byteStringCI :: LB.ByteString -> Parser LB.ByteString
-byteStringCI s = Parser $ \(S bs n) ->
-               let l = LB.length s
+-- | Satisfy a literal string, ignoring case.
+stringCI :: LB.ByteString -> Parser LB.ByteString
+stringCI s = Parser $ \(S sb lb n) ->
+               let bs = sb +: lb
+                   l = LB.length s
                    (h, t) = LB.splitAt l bs
                in if ls == LB.map toLower h
-                  then Right (s, S t (n + l))
+                  then Right (s, mkState t (n + l))
                   else Left (bs, [])
     where ls = LB.map toLower s
-{-# INLINE byteStringCI #-}
-
-string :: String -> Parser String
-string s = byteString (LB.pack s) >> return s
-{-# INLINE string #-}
-
-stringCI :: String -> Parser String
-stringCI s = byteStringCI (LB.pack s) >> return s
 {-# INLINE stringCI #-}
 
 -- | Apply the given parser repeatedly, returning every parse result.
 {-# INLINE count #-}
 
 try :: Parser a -> Parser a
-try p = Parser $ \s@(S bs _) ->
+try p = Parser $ \s@(S sb lb _) ->
         case unParser p s of
-          Left (_, msgs) -> Left (bs, msgs)
+          Left (_, msgs) -> Left (sb +: lb, msgs)
           ok -> ok
 
 -- | Detect 'end of file'.
 eof :: Parser ()
-eof = Parser $ \s@(S bs _) -> if LB.null bs
-                              then Right ((), s)
-                              else Left (bs, ["EOF"])
+eof = Parser $ \s@(S sb lb _) -> if SB.null sb && LB.null lb
+                                 then Right ((), s)
+                                 else Left (sb +: lb, ["EOF"])
 
 takeAll :: Parser LB.ByteString
-takeAll = Parser $ \(S bs n) -> Right (bs, S LB.empty (n + LB.length bs))
+takeAll = Parser $ \(S sb lb n) ->
+          let bs = sb +: lb
+          in Right (bs, mkState LB.empty (n + LB.length bs))
+
+oneChunk :: SB.ByteString -> LB.ByteString
+oneChunk s = LB.Chunk s LB.Empty
+
+length64 :: SB.ByteString -> Int64
+length64 = fromIntegral . SB.length
 
 -- | Consume characters while the predicate is true.
 takeWhile :: (Char -> Bool) -> Parser LB.ByteString
-takeWhile f = Parser $ \(S bs n) ->
-              let (h, bs') = LB.span f bs
-              in Right (h, S bs' (n + LB.length h))
+takeWhile p = Parser $ \s@(S sb lb n) ->
+              let (h, t) = SB.span p sb
+              in if SB.null t
+                 then unParser ((h +:) <$> (nextChunk *> takeWhile p)) s
+                 else Right (oneChunk h, S t lb (n + length64 h))
 {-# INLINE takeWhile #-}
 
 takeTill :: (Char -> Bool) -> Parser LB.ByteString
 {-# INLINE takeTill #-}
 
 takeWhile1 :: (Char -> Bool) -> Parser LB.ByteString
-takeWhile1 f = Parser $ \(S bs n) ->
-              let (h, bs') = LB.span f bs
-              in if LB.null h
-                 then Left (bs, [])
-                 else Right (h, S bs' (n + LB.length h))
+takeWhile1 p = Parser $ \s@(S sb lb n) ->
+               let (h, t) = SB.span p sb
+               in if SB.null t
+                  then case unParser (nextChunk >> takeWhile p) s of
+                         Left err -> Left err
+                         Right (xs, s') ->
+                             let bs = h +: xs
+                             in if LB.null bs
+                               then Left (sb +: lb, [])
+                               else Right (bs, s')
+                  else Right (oneChunk h, S t lb (n + length64 h))
 {-# INLINE takeWhile1 #-}
 
 -- | Skip over characters while the predicate is true.
 skipSpace = takeWhile isSpace >> return ()
 {-# INLINE skipSpace #-}
 
--- | Take zero or more instances of the parser.
-many ::  Parser a -> Parser [a]
-many p = scan id
-    where scan f = do x <- p
-                      scan (f . (x:))
-                 <|> return (f [])
-
--- | Take one or more instances of the parser.
-many1 :: Parser a -> Parser [a]
-many1 p = liftM2 (:) p (many p)
-
 manyTill :: Parser a -> Parser b -> Parser [a]
 manyTill p end = scan
     where scan = (end >> return []) <|> liftM2 (:) p scan
              end <- getConsumed
              return (LB.take (end - start) bs)
 
-maybeP :: Parser a -> Parser (Maybe a)
-maybeP p = (Just <$> p) <|> pure Nothing
-{-# INLINE maybeP #-}
-
 eitherP :: Parser a -> Parser b -> Parser (Either a b)
 eitherP a b = (Left <$> a) <|> (Right <$> b)
 {-# INLINE eitherP #-}
 parseAt :: Parser a -> LB.ByteString -> Int64
         -> (LB.ByteString, Either ParseError a)
 parseAt p bs n = 
-    case unParser p (S bs n) of
+    case unParser p (mkState bs n) of
       Left (bs', msg) -> (bs', Left $ showError msg)
-      Right (a, S bs' _) -> (bs', Right a)
+      Right (a, S sb lb _) -> (sb +: lb, Right a)
     where
       showError [msg] = "Parser error, expected:\n" ++ msg ++ "\n"
       showError msgs = "Parser error, expected one of:\n" ++ unlines msgs
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.