Bryan O'Sullivan avatar Bryan O'Sullivan committed 0b25b4e Merge

Merge pull request #28 from tibbe/optimize

Several performance improvements

Comments (0)

Files changed (4)

Data/Attoparsec/ByteString/Internal.hs

-{-# LANGUAGE BangPatterns, Rank2Types, OverloadedStrings, RecordWildCards #-}
+{-# LANGUAGE BangPatterns, CPP, Rank2Types, OverloadedStrings,
+    RecordWildCards #-}
 -- |
 -- Module      :  Data.Attoparsec.ByteString.Internal
 -- Copyright   :  Bryan O'Sullivan 2007-2011
 import qualified Data.ByteString.Lazy as L
 import qualified Data.ByteString.Unsafe as B
 
+#if defined(__GLASGOW_HASKELL__)
+import GHC.Exts (inline)
+#else
+inline :: a -> a
+inline x = x
+#endif
+
 type Parser = T.Parser B.ByteString
 type Result = IResult B.ByteString
 type Input = T.Input B.ByteString
 type Failure r = T.Failure B.ByteString r
 type Success a r = T.Success B.ByteString a r
 
+ensure' :: Int -> Input -> Added -> More -> Failure r -> Success B.ByteString r
+        -> IResult B.ByteString r
+ensure' n0 i0 a0 m0 kf0 ks0 =
+    T.runParser (demandInput >> go n0) i0 a0 m0 kf0 ks0
+  where
+    go n = T.Parser $ \i a m kf ks ->
+        if B.length (unI i) >= n
+        then ks i a m (unI i)
+        else T.runParser (demandInput >> go n) i a m kf ks
+
 -- | If at least @n@ bytes of input are available, return the current
 -- input, otherwise fail.
 ensure :: Int -> Parser B.ByteString
 ensure !n = T.Parser $ \i0 a0 m0 kf ks ->
     if B.length (unI i0) >= n
-    then ks i0 a0 m0 (unI i0)
-    else T.runParser (demandInput >> ensure n) i0 a0 m0 kf ks
+    -- Inline the success continuation to avoid creating a closure in
+    -- the common case of enough data in the buffer:
+    then inline ks i0 a0 m0 (unI i0)
+    -- The uncommon case is kept out-of-line to reduce code size:
+    else ensure' n i0 a0 m0 kf ks
+-- Non-recursive so the bounds check can be inlined:
+{-# INLINE ensure #-}
 
 -- | Ask for input.  If we receive any, pass it to a success
 -- continuation, otherwise to a failure continuation.
 satisfy :: (Word8 -> Bool) -> Parser Word8
 satisfy p = do
   s <- ensure 1
-  let w = B.unsafeHead s
+  let !w = B.unsafeHead s
   if p w
     then put (B.unsafeTail s) >> return w
     else fail "satisfy"
+{-# INLINE satisfy #-}
 
 -- | The parser @skip p@ succeeds for any byte for which the predicate
 -- @p@ returns 'True'.
 satisfyWith :: (Word8 -> a) -> (a -> Bool) -> Parser a
 satisfyWith f p = do
   s <- ensure 1
-  let c = f (B.unsafeHead s)
+  let c = f $! B.unsafeHead s
   if p c
-    then put (B.unsafeTail s) >> return c
+    then let !t = B.unsafeTail s
+         in put t >> return c
     else fail "satisfyWith"
+{-# INLINE satisfyWith #-}
 
 storable :: Storable a => Parser a
 storable = hack undefined
           then go (h:acc)
           else return (h:acc)
       else return (h:acc)
+{-# INLINE takeWhile #-}
 
 takeRest :: Parser [B.ByteString]
 takeRest = go []

Data/Attoparsec/Text/Internal.hs

-{-# LANGUAGE BangPatterns, FlexibleInstances, OverloadedStrings, Rank2Types,
-    RecordWildCards, TypeSynonymInstances #-}
+{-# LANGUAGE BangPatterns, CPP, FlexibleInstances, OverloadedStrings,
+    Rank2Types, RecordWildCards, TypeSynonymInstances #-}
 {-# OPTIONS_GHC -fno-warn-orphans #-}
 -- |
 -- Module      :  Data.Attoparsec.Text.Internal
 import qualified Data.Attoparsec.Internal.Types as T
 import qualified Data.Attoparsec.Text.FastSet as Set
 import qualified Data.Text as T
+import qualified Data.Text.Internal as T
 import qualified Data.Text.Lazy as L
 
+#if defined(__GLASGOW_HASKELL__)
+import GHC.Exts (inline)
+#else
+inline :: a -> a
+inline x = x
+#endif
+
 type Parser = T.Parser Text
 type Result = IResult Text
 type Input = T.Input Text
 instance IsString (Parser Text) where
     fromString = string . T.pack
 
+lengthAtLeast :: T.Text -> Int -> Bool
+lengthAtLeast t@(T.Text _ _ l) n = l >= n * 4 || T.length t >= n
+{-# INLINE lengthAtLeast #-}
+
 -- | If at least @n@ characters of input are available, return the
 -- current input, otherwise fail.
 ensure :: Int -> Parser Text
 ensure !n = T.Parser $ \i0 a0 m0 kf ks ->
-    if T.length (unI i0) >= n
-    then ks i0 a0 m0 (unI i0)
-    else runParser (demandInput >> ensure n) i0 a0 m0 kf ks
+    if lengthAtLeast (unI i0) n
+    then inline ks i0 a0 m0 (unI i0)
+    else runParser (demandInput >> go n) i0 a0 m0 kf ks
+  where
+    go n' = T.Parser $ \i0 a0 m0 kf ks ->
+        if lengthAtLeast (unI i0) n'
+        then ks i0 a0 m0 (unI i0)
+        else runParser (demandInput >> go n') i0 a0 m0 kf ks
+{-# INLINE ensure #-}
 
 -- | Ask for input.  If we receive any, pass it to a success
 -- continuation, otherwise to a failure continuation.
 satisfy :: (Char -> Bool) -> Parser Char
 satisfy p = do
   s <- ensure 1
-  let w = unsafeHead s
+  let !w = unsafeHead s
   if p w
     then put (unsafeTail s) >> return w
     else fail "satisfy"
+{-# INLINE satisfy #-}
 
 -- | The parser @skip p@ succeeds for any character for which the
 -- predicate @p@ returns 'True'.
 satisfyWith :: (Char -> a) -> (a -> Bool) -> Parser a
 satisfyWith f p = do
   s <- ensure 1
-  let c = f (unsafeHead s)
+  let c = f $! unsafeHead s
   if p c
-    then put (unsafeTail s) >> return c
+    then let !t = unsafeTail s
+         in put t >> return c
     else fail "satisfyWith"
+{-# INLINE satisfyWith #-}
 
 -- | Consume @n@ characters of input, but succeed only if the
 -- predicate returns 'True'.
     test-framework-quickcheck2 >= 0.2,
     text
 
+benchmark benchmarks
+  type: exitcode-stdio-1.0
+  hs-source-dirs: benchmarks
+  main-is: Benchmarks.hs
+  build-depends:
+    attoparsec,
+    base,
+    bytestring,
+    criterion >= 0.5,
+    deepseq >= 1.1,
+    parsec >= 3.1.2,
+    text
+
 source-repository head
   type:     git
   location: https://github.com/bos/attoparsec

benchmarks/Benchmarks.hs

 import Control.Applicative
 import Control.DeepSeq (NFData(rnf))
 import Criterion.Main (bench, bgroup, defaultMain, nf, whnf)
+import Data.Bits (unsafeShiftL)
 import Data.ByteString.Internal (ByteString(..))
 import Data.Char
+import Data.Word (Word32)
 import Text.Parsec.Text ()
 import Text.Parsec.Text.Lazy ()
 import qualified Data.Attoparsec.ByteString as AB
      , bench "isAlpha_ascii" $ nf (ABL.parse (AC.takeWhile AC.isAlpha_ascii)) bl
      , bench "isAlpha_iso8859_15" $ nf (ABL.parse (AC.takeWhile AC.isAlpha_iso8859_15)) bl
      ]
+   , bench "word32LE" $ nf (AB.parse word32LE) b
    ]
+
+-- Benchmarks bind and (potential) bounds-check merging.
+word32LE :: AB.Parser Word32
+word32LE = do
+    w1 <- AB.anyWord8
+    w2 <- AB.anyWord8
+    w3 <- AB.anyWord8
+    w4 <- AB.anyWord8
+    return $! (fromIntegral w1 :: Word32) +
+        fromIntegral w2 `unsafeShiftL` 8 +
+        fromIntegral w3 `unsafeShiftL` 16 +
+        fromIntegral w4 `unsafeShiftL` 32
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.