Commits

Bryan O'Sullivan committed 181af00

Add a tiny, focused "Zepto" module that is faster for some tasks

Although up to 10x slower for general parsing, this module improves
the performance of the aeson module's string unescaper by between
10% and 50%.

Comments (0)

Files changed (2)

Data/Attoparsec/Zepto.hs

+{-# LANGUAGE BangPatterns, MagicHash, UnboxedTuples #-}
+
+-- |
+-- Module      :  Data.Attoparsec.Zepto
+-- Copyright   :  Bryan O'Sullivan 2011
+-- License     :  BSD3
+-- 
+-- Maintainer  :  bos@serpentine.com
+-- Stability   :  experimental
+-- Portability :  unknown
+--
+-- A tiny, highly specialized combinator parser for 'B.ByteString'
+-- strings.
+--
+-- While the main Attoparsec module generally performs well, this
+-- module is particularly fast for simple non-recursive loops that
+-- should not normally result in failed parses.
+--
+-- /Warning/: on more complex inputs involving recursion or failure,
+-- parsers based on this module may be as much as /ten times slower/
+-- than regular Attoparsec! You should /only/ use this module when you
+-- have benchmarks that prove that its use speeds your code up.
+module Data.Attoparsec.Zepto
+    (
+      Parser
+    , parse
+    , atEnd
+    , string
+    , take
+    , takeWhile
+    ) where
+
+import Data.Word (Word8)
+import Control.Applicative
+import Control.Monad
+import Data.Monoid
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Unsafe as B
+import Data.ByteString (ByteString)
+import Prelude hiding (take, takeWhile)
+
+newtype S = S {
+      input :: ByteString
+    }
+
+data Result a = Fail String
+              | OK !a
+
+-- | A simple parser.
+--
+-- This monad is strict in its state, and the monadic bind operator
+-- ('>>=') evaluates each result to weak head normal form before
+-- passing it along.
+newtype Parser a = Parser {
+      runParser :: S -> (# Result a, S #)
+    }
+
+instance Functor Parser where
+    fmap f m = Parser $ \s -> case runParser m s of
+                                (# OK a, s' #)     -> (# OK (f a), s' #)
+                                (# Fail err, s' #) -> (# Fail err, s' #)
+    {-# INLINE fmap #-}
+
+instance Monad Parser where
+    return a = Parser $ \s -> (# OK a, s #)
+    {-# INLINE return #-}
+
+    m >>= k   = Parser $ \s -> case runParser m s of
+                                 (# OK a, s' #) -> runParser (k a) s'
+                                 (# Fail err, s' #) -> (# Fail err, s' #)
+    {-# INLINE (>>=) #-}
+
+    fail msg = Parser $ \s -> (# Fail msg, s #)
+
+instance MonadPlus Parser where
+    mzero = fail "mzero"
+    {-# INLINE mzero #-}
+
+    mplus a b = Parser $ \s ->
+                case runParser a s of
+                  (# ok@(OK _), s' #) -> (# ok, s' #)
+                  (# _, _ #) -> case runParser b s of
+                                   (# ok@(OK _), s'' #) -> (# ok, s'' #)
+                                   (# err, s'' #) -> (# err, s'' #)
+    {-# INLINE mplus #-}
+
+instance Applicative Parser where
+    pure   = return
+    (<*>)  = ap
+
+gets :: (S -> a) -> Parser a
+gets f = Parser $ \s -> (# OK (f s), s #)
+{-# INLINE gets #-}
+
+put :: S -> Parser ()
+put s = Parser $ \_ -> (# OK (), s #)
+{-# INLINE put #-}
+
+-- | Run a parser.
+parse :: Parser a -> ByteString -> Either String a
+parse p bs = case runParser p (S bs) of
+               (# OK a, _ #) -> Right a
+               (# Fail err, _ #) -> Left err
+
+instance Monoid (Parser a) where
+    mempty  = fail "mempty"
+    {-# INLINE mempty #-}
+    mappend = mplus
+
+instance Alternative Parser where
+    empty = fail "empty"
+    {-# INLINE empty #-}
+    (<|>) = mplus
+
+-- | Consume input while the predicate returns 'True'.
+takeWhile :: (Word8 -> Bool) -> Parser ByteString
+takeWhile p = do
+  (h,t) <- gets (B.span p . input)
+  put (S t)
+  return h
+{-# INLINE takeWhile #-}
+
+-- | Consume @n@ bytes of input.
+take :: Int -> Parser ByteString
+take !n = do
+  s <- gets input
+  if B.length s >= n
+    then put (S (B.unsafeDrop n s)) >> return (B.unsafeTake n s)
+    else fail "insufficient input"
+{-# INLINE take #-}
+
+-- | Match a string exactly.
+string :: ByteString -> Parser ()
+string s = do
+  i <- gets input
+  if s `B.isPrefixOf` i
+    then put (S (B.unsafeDrop (B.length s) i)) >> return ()
+    else fail "string"
+{-# INLINE string #-}
+
+-- | Indicate whether the end of the input has been reached.
+atEnd :: Parser Bool
+atEnd = do
+  i <- gets input
+  return $! B.null i
+{-# INLINE atEnd #-}
                    Data.Attoparsec.Combinator
                    Data.Attoparsec.FastSet
                    Data.Attoparsec.Lazy
+                   Data.Attoparsec.Zepto
   other-modules:   Data.Attoparsec.Internal
   ghc-options:     -Wall