Bryan O'Sullivan avatar Bryan O'Sullivan committed 16ecbe9

Drop dead files.

Comments (0)

Files changed (2)

src/Text/ParserCombinators/ByteStringParser.hs

------------------------------------------------------------------------------
--- |
--- Module      :  Text.ParserCombinators.ByteStringParser
--- Copyright   :  Daan Leijen 1999-2001, Jeremy Shaw 2006, Bryan O'Sullivan 2007-2008
--- License     :  BSD3
--- 
--- Maintainer  :  bos@serpentine.com
--- Stability   :  experimental
--- Portability :  unknown
---
--- Simple, efficient parser combinators for lazy 'LB.ByteString'
--- strings, loosely based on 'Text.ParserCombinators.Parsec'.
--- 
------------------------------------------------------------------------------
-module Text.ParserCombinators.ByteStringParser
-    (
-    -- * Parser
-      ParseError
-    , Parser
-
-    -- * Running parsers
-    , parse
-    , parseAt
-    , parseTest
-
-    -- * Combinators
-    , (<?>)
-
-    -- * Things vaguely like those in @Parsec.Combinator@ (and @Parsec.Prim@)
-    , try
-    , manyTill
-    , eof
-    , skipMany
-    , skipMany1
-    , count
-    , lookAhead
-    , peek
-    , sepBy
-    , sepBy1
-
-    -- * Things like in @Parsec.Char@
-    , satisfy
-    , letter
-    , digit
-    , anyChar
-    , space
-    , char
-    , notChar
-    , string
-    , stringCI
-
-    -- * Parser converters.
-    , eitherP
-
-    -- * Miscellaneous functions.
-    , getInput
-    , getConsumed
-    , takeWhile
-    , takeWhile1
-    , takeTill
-    , takeAll
-    , skipWhile
-    , skipSpace
-    , notEmpty
-    , match
-    , inClass
-    , notInClass
-    ) where
-
-import Control.Applicative (Alternative(..), Applicative(..), (<$>), (<*), (*>))
-import Control.Monad (MonadPlus(..), ap, liftM2)
-import Control.Monad.Fix (MonadFix(..))
-import qualified Data.ByteString.Char8 as SB
-import qualified Data.ByteString.Lazy.Char8 as LB
-import qualified Data.ByteString.Lazy.Internal as LB
-import Data.Char (isDigit, isLetter, isSpace, toLower)
-import Data.Int (Int64)
-import Text.ParserCombinators.ByteStringParser.FastSet (FastSet, member, set)
-import Prelude hiding (takeWhile)
-
-type ParseError = String
-
-data S = S {-# UNPACK #-} !SB.ByteString
-           LB.ByteString
-           {-# UNPACK #-} !Int64
-
-newtype Parser a = Parser {
-      unParser :: S -> Either (LB.ByteString, [String]) (a, S)
-    }
-
-instance Functor Parser where
-    fmap f p =
-        Parser $ \s ->
-            case unParser p s of
-              Right (a, s') -> Right (f a, s')
-              Left err -> Left err
-
-instance Monad Parser where
-    return a = Parser $ \s -> Right (a, s)
-    m >>= f = Parser $ \s ->
-              case unParser m s of
-                Right (a, s') -> unParser (f a) s'
-                Left (s', msgs) -> Left (s', msgs)
-    fail err = Parser $ \(S sb lb _) -> Left (sb +: lb, [err])
-
-instance MonadFix Parser where
-    mfix f = Parser $ \s ->
-             let r = case r of
-                       Right (a, _) -> unParser (f a) s
-                       err -> err
-             in r
-
-zero :: Parser a
-zero = Parser $ \(S sb lb _) -> Left (sb +: lb, [])
-{-# INLINE zero #-}
-
-plus :: Parser a -> Parser a -> Parser a
-plus p1 p2 =
-    Parser $ \s@(S sb lb _) ->
-        case unParser p1 s of
-          Left (_, msgs1) -> 
-              case unParser p2 s of
-                Left (_, msgs2) -> Left (sb +: lb, (msgs1 ++ msgs2))
-                ok -> ok
-          ok -> ok
-{-# INLINE plus #-}
-
-instance MonadPlus Parser where
-    mzero = zero
-    mplus = plus
-
-instance Applicative Parser where
-    pure = return
-    (<*>) = ap
-
-instance Alternative Parser where
-    empty = zero
-    (<|>) = plus
-
-mkState :: LB.ByteString -> Int64 -> S
-mkState s = case s of
-              LB.Empty -> S SB.empty s
-              LB.Chunk x xs -> S x xs
-
--- | Turn our chunked representation back into a normal lazy
--- ByteString.
-(+:) :: SB.ByteString -> LB.ByteString -> LB.ByteString
-sb +: lb | SB.null sb = lb
-         | otherwise = LB.Chunk sb lb
-{-# INLINE (+:) #-}
-
-infix 0 <?>
-
--- | Name the parser.
-(<?>) :: Parser a -> String -> Parser a
-p <?> msg =
-    Parser $ \s@(S sb lb _) ->
-        case unParser p s of
-          (Left _) -> Left (sb +: lb, [msg])
-          ok -> ok
-{-# INLINE (<?>) #-}
-
-nextChunk :: Parser ()
-nextChunk = Parser $ \(S _ lb n) ->
-            case lb of
-              LB.Chunk sb' lb' -> Right ((), S sb' lb' n)
-              LB.Empty -> Left (lb, [])
-
--- | Get remaining input.
-getInput :: Parser LB.ByteString
-getInput = Parser $ \s@(S sb lb _) -> Right (sb +: lb, s)
-
--- | Get number of bytes consumed so far.
-getConsumed :: Parser Int64
-getConsumed = Parser $ \s@(S _ _ n) -> Right (n, s)
-
--- | Character parser.
-satisfy :: (Char -> Bool) -> Parser Char
-satisfy p =
-    Parser $ \s@(S sb lb n) ->
-           case SB.uncons sb of
-             Just (c, sb') | p c -> Right (c, S sb' lb (n + 1))
-                           | otherwise -> Left (sb +: lb, [])
-             Nothing -> unParser (nextChunk >> satisfy p) s
-{-# INLINE satisfy #-}
-
-letter :: Parser Char
-letter = satisfy isLetter <?> "letter"
-{-# INLINE letter #-}
-
-digit :: Parser Char
-digit = satisfy isDigit <?> "digit"
-{-# INLINE digit #-}
-
-anyChar :: Parser Char
-anyChar = satisfy $ const True
-{-# INLINE anyChar #-}
-
-space :: Parser Char
-space = satisfy isSpace <?> "space"
-{-# INLINE space #-}
-
--- | Satisfy a specific character.
-char :: Char -> Parser Char
-char c = satisfy (== c) <?> [c]
-{-# INLINE char #-}
-
--- | Satisfy a specific character.
-notChar :: Char -> Parser Char
-notChar c = satisfy (/= c) <?> "not " ++ [c]
-{-# INLINE notChar #-}
-
-charClass :: String -> FastSet
-charClass = set . SB.pack . go
-    where go (a:'-':b:xs) = [a..b] ++ go xs
-          go (x:xs) = x : go xs
-          go _ = ""
-
-inClass :: String -> Char -> Bool
-inClass s = (`member` myset)
-    where myset = charClass s
-{-# INLINE inClass #-}
-
-notInClass :: String -> Char -> Bool
-notInClass s = not . inClass s
-{-# INLINE notInClass #-}
-
-sepBy :: Parser a -> Parser s -> Parser [a]
-sepBy p s = liftM2 (:) p ((s >> sepBy1 p s) <|> return []) <|> return []
-
-sepBy1 :: Parser a -> Parser s -> Parser [a]
-sepBy1 p s = liftM2 (:) p ((s >> sepBy1 p s) <|> return [])
-
--- | Satisfy a literal string.
-string :: LB.ByteString -> Parser LB.ByteString
-string s = Parser $ \(S sb lb n) ->
-           let bs = sb +: lb
-               l = LB.length s
-               (h, t) = LB.splitAt l bs
-           in if s == h
-              then Right (s, mkState t (n + l))
-              else Left (bs, [])
-{-# INLINE string #-}
-
--- | Satisfy a literal string, ignoring case.
-stringCI :: LB.ByteString -> Parser LB.ByteString
-stringCI s = Parser $ \(S sb lb n) ->
-             let bs = sb +: lb
-                 l = LB.length s
-                 (h, t) = LB.splitAt l bs
-             in if ls == LB.map toLower h
-                then Right (s, mkState t (n + l))
-                else Left (bs, [])
-    where ls = LB.map toLower s
-{-# INLINE stringCI #-}
-
--- | Apply the given parser repeatedly, returning every parse result.
-count :: Int -> Parser a -> Parser [a]
-count n p = sequence (replicate n p)
-{-# INLINE count #-}
-
-try :: Parser a -> Parser a
-try p = Parser $ \s@(S sb lb _) ->
-        case unParser p s of
-          Left (_, msgs) -> Left (sb +: lb, msgs)
-          ok -> ok
-
--- | Detect 'end of file'.
-eof :: Parser ()
-eof = Parser $ \s@(S sb lb _) -> if SB.null sb && LB.null lb
-                                 then Right ((), s)
-                                 else Left (sb +: lb, ["EOF"])
-
-takeAll :: Parser LB.ByteString
-takeAll = Parser $ \(S sb lb n) ->
-          let bs = sb +: lb
-          in Right (bs, mkState LB.empty (n + LB.length bs))
-
-oneChunk :: SB.ByteString -> LB.ByteString
-oneChunk s = LB.Chunk s LB.Empty
-
-length64 :: SB.ByteString -> Int64
-length64 = fromIntegral . SB.length
-
--- | Consume characters while the predicate is true.
-takeWhile :: (Char -> Bool) -> Parser LB.ByteString
-takeWhile p = Parser $ \s@(S sb lb n) ->
-              let (h, t) = SB.span p sb
-              in if SB.null t
-                 then unParser ((h +:) <$> (nextChunk *> takeWhile p)) s
-                 else Right (oneChunk h, S t lb (n + length64 h))
-{-# INLINE takeWhile #-}
-
-takeTill :: (Char -> Bool) -> Parser LB.ByteString
-takeTill p = takeWhile (not . p) <* satisfy p
-{-# INLINE takeTill #-}
-
-takeWhile1 :: (Char -> Bool) -> Parser LB.ByteString
-takeWhile1 p = Parser $ \s@(S sb lb n) ->
-               let (h, t) = SB.span p sb
-               in if SB.null t
-                  then case unParser (nextChunk *> takeWhile p) s of
-                         Left err -> Left err
-                         Right (xs, s') ->
-                             let bs = h +: xs
-                             in if LB.null bs
-                               then Left (sb +: lb, [])
-                               else Right (bs, s')
-                  else Right (oneChunk h, S t lb (n + length64 h))
-{-# INLINE takeWhile1 #-}
-
--- | Skip over characters while the predicate is true.
-skipWhile :: (Char -> Bool) -> Parser ()
-skipWhile p = takeWhile p >> return ()
-{-# INLINE skipWhile #-}
-
--- | Skip over white space.
-skipSpace :: Parser ()
-skipSpace = takeWhile isSpace >> return ()
-{-# INLINE skipSpace #-}
-
-manyTill :: Parser a -> Parser b -> Parser [a]
-manyTill p end = scan
-    where scan = (end >> return []) <|> liftM2 (:) p scan
-
--- |'skipMany' - skip zero or many instances of the parser
-skipMany :: Parser a -> Parser ()
-skipMany p = scan
-    where scan = (p >> scan) <|> return ()
-
--- |'skipMany1' - skip one or many instances of the parser       
-skipMany1 :: Parser  a -> Parser ()
-skipMany1 p = p >> skipMany p
-
--- | Test that a parser returned a non-null ByteString.
-notEmpty :: Parser LB.ByteString -> Parser LB.ByteString 
-notEmpty p = Parser $ \s ->
-             case unParser p s of
-               o@(Right (a, _)) ->
-                   if LB.null a
-                   then Left (a, ["notEmpty"])
-                   else o
-               x -> x
-
--- | Parse some input with the given parser and return that input
--- without copying it.
-match :: Parser a -> Parser LB.ByteString
-match p = do bs <- getInput
-             start <- getConsumed
-             p
-             end <- getConsumed
-             return (LB.take (end - start) bs)
-
-eitherP :: Parser a -> Parser b -> Parser (Either a b)
-eitherP a b = (Left <$> a) <|> (Right <$> b)
-{-# INLINE eitherP #-}
-
-peek :: Parser a -> Parser (Maybe a)
-peek p = Parser $ \s ->
-         case unParser p s of
-           Right (m, _) -> Right (Just m, s)
-           _ -> Right (Nothing, s)
-
-lookAhead :: Parser a -> Parser a
-lookAhead p = Parser $ \s ->
-         case unParser p s of
-           Right (m, _) -> Right (m, s)
-           Left (e, bs) -> Left (e, bs)
-
-parseAt :: Parser a -> LB.ByteString -> Int64
-        -> (LB.ByteString, Either ParseError (a, Int64))
-parseAt p bs n = 
-    case unParser p (mkState bs n) of
-      Left (bs', msg) -> (bs', Left $ showError msg)
-      Right (a, S sb lb n') -> (sb +: lb, Right (a, n'))
-    where
-      showError [msg] = "Parser error, expected:\n" ++ msg ++ "\n"
-      showError msgs = "Parser error, expected one of:\n" ++ unlines msgs
-
--- | Run a parser.
-parse :: Parser a -> LB.ByteString
-      -> (LB.ByteString, Either ParseError a)
-parse p bs = case parseAt p bs 0 of
-               (bs', Right (a, _)) -> (bs', Right a)
-               (bs', Left err) -> (bs', Left err)
-
-parseTest :: (Show a) => Parser a -> LB.ByteString -> IO ()
-parseTest p s =
-    case parse p s of
-      (st, Left msg) -> putStrLn $ msg ++ "\nGot:\n" ++ show st
-      (_, Right r) -> print r

src/Text/ParserCombinators/ByteStringParser/FastSet.hs

-{-# LANGUAGE BangPatterns #-}
-
------------------------------------------------------------------------------
--- |
--- Module      :  Text.ParserCombinators.ByteStringParser.FastSet
--- Copyright   :  Bryan O'Sullivan 2008
--- License     :  BSD3
--- 
--- Maintainer  :  bos@serpentine.com
--- Stability   :  experimental
--- Portability :  unknown
---
--- Fast 8-bit character set membership.
--- 
------------------------------------------------------------------------------
-module Text.ParserCombinators.ByteStringParser.FastSet
-    (
-    -- * Data type
-      FastSet
-    -- * Construction
-    , set
-    -- * Lookup
-    , member
-    , member8
-    -- * Debugging
-    , fromSet
-    ) where
-
-import qualified Data.ByteString as B
--- import Data.ByteString.Char8 (pack)
-import qualified Data.ByteString.Internal as I
-import qualified Data.ByteString.Unsafe as U
-import Data.Word (Word8)
-import Foreign.Storable (peekByteOff, pokeByteOff)
-
-data FastSet = Sorted { fromSet :: {-# UNPACK #-} !B.ByteString }
-             | Table  { fromSet :: {-# UNPACK #-} !B.ByteString }
-    deriving (Eq, Ord)
-
-instance Show FastSet where
-    show (Sorted s) = "FastSet " ++ show s
-    show (Table t)  = "FastSet " ++ fromTable t
-
--- | The lower bound on the size of a lookup table.  We choose this to
--- balance table density against performance.
-tableCutoff :: Int
-tableCutoff = 32
-
--- | Create a character set.
-set :: B.ByteString -> FastSet
-set s | B.length s < tableCutoff = Sorted . B.sort $ s
-      | otherwise                = Table . mkTable $ s
-
--- | Check the table for membership.
-member :: Char -> FastSet -> Bool
-member c = member8 (I.c2w c)
-
--- | Check the table for membership.
-member8 :: Word8 -> FastSet -> Bool
-member8 w (Table t)  = U.unsafeIndex t (fromIntegral w) == entry
-member8 w (Sorted s) = search 0 (B.length s - 1)
-    where search !lo !hi
-              | hi < lo = False
-              | otherwise =
-                  let mid = (lo + hi) `div` 2
-                  in case compare w (U.unsafeIndex s mid) of
-                       GT -> search lo (mid - 1)
-                       LT -> search (mid + 1) hi
-                       _ -> True
-
--- | The value in a table that indicates that a character is not
--- present.  We avoid NUL to make the table representation printable.
-noEntry :: Word8
-noEntry = 0x5f
-
--- | The value in a table that indicates that a character is present.
--- We use a printable character for readability.
-entry :: Word8
-entry = 0x21
-
-mkTable :: B.ByteString -> B.ByteString
-mkTable s = I.unsafeCreate 256 $ \t -> do
-            I.memset t noEntry 256
-            U.unsafeUseAsCStringLen s $ \(p, l) ->
-              let loop n | n == l = return ()
-                         | otherwise = do
-                    c <- peekByteOff p n :: IO Word8
-                    pokeByteOff t (fromIntegral c) entry
-                    loop (n + 1)
-              in loop 0
-
--- | Turn the table representation into a string, for debugging.
-fromTable :: B.ByteString -> String
-fromTable = snd . B.foldr go (0xff, [])
-    where go c (!n, cs) | c == noEntry = (n - 1, cs)
-                        | otherwise    = (n - 1, I.w2c n:cs)
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.