Bryan O'Sullivan avatar Bryan O'Sullivan committed bf66e8e Merge

Merge pull request #35 from tibbe/scan

Optimize scan for short strings

Comments (0)

Files changed (2)

Data/Attoparsec/ByteString/Internal.hs

 {-# LANGUAGE BangPatterns, CPP, Rank2Types, OverloadedStrings,
-    RecordWildCards #-}
+    RecordWildCards, MagicHash, UnboxedTuples #-}
 -- |
 -- Module      :  Data.Attoparsec.ByteString.Internal
 -- Copyright   :  Bryan O'Sullivan 2007-2011
 import Foreign.Ptr (castPtr, minusPtr, plusPtr)
 import Foreign.Storable (Storable(peek, sizeOf))
 import Prelude hiding (getChar, take, takeWhile)
-import System.IO.Unsafe (unsafePerformIO)
 import qualified Data.Attoparsec.Internal.Types as T
 import qualified Data.ByteString as B8
 import qualified Data.ByteString.Char8 as B
 import qualified Data.ByteString.Unsafe as B
 
 #if defined(__GLASGOW_HASKELL__)
+import GHC.Base (realWorld#)
 import GHC.Exts (inline)
+import GHC.IO (IO(IO))
 #else
+import System.IO.Unsafe (unsafePerformIO)
+
 inline :: a -> a
 inline x = x
 #endif
   chunks <- go [] s0
   case chunks of
     [x] -> return x
-    xs  -> return . B.concat . reverse $ xs
+    xs  -> return $! B.concat $ reverse xs
  where
   go acc s1 = do
     let scanner (B.PS fp off len) =
                 done !i !s = return (T i s)
             inner start s1
     bs <- get
-    let T i s' = unsafePerformIO $ scanner bs
-        h = B.unsafeTake i bs
-        t = B.unsafeDrop i bs
+    let T i s' = inlinePerformIO $ scanner bs
+        !h = B.unsafeTake i bs
+        !t = B.unsafeDrop i bs
     put t
     if B.null t
       then do
                   Done _ a     -> Right a
                   _            -> error "parseOnly: impossible error!"
 {-# INLINE parseOnly #-}
+
+-- | Just like unsafePerformIO, but we inline it. Big performance gains as
+-- it exposes lots of things to further inlining. /Very unsafe/. In
+-- particular, you should do no memory allocation inside an
+-- 'inlinePerformIO' block. On Hugs this is just @unsafePerformIO@.
+inlinePerformIO :: IO a -> a
+#if defined(__GLASGOW_HASKELL__)
+inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
+#else
+inlinePerformIO = unsafePerformIO
+#endif
+{-# INLINE inlinePerformIO #-}

benchmarks/Benchmarks.hs

 import qualified Data.ByteString.Lazy as BL
 import qualified Data.Text as T
 import qualified Data.Text.Lazy as TL
+import Data.Word (Word8)
 import qualified Text.Parsec as P
 
 instance NFData ByteString where
      , bench "isAlpha_iso8859_15" $ nf (ABL.parse (AC.takeWhile AC.isAlpha_iso8859_15)) bl
      ]
    , bench "word32LE" $ nf (AB.parse word32LE) b
+   , bgroup "scan" [
+       bench "short" $ nf (AB.parse quotedString) (BC.pack "abcdefghijk\"")
+     , bench "long" $ nf (AB.parse quotedString) b
+     ]
    ]
 
 -- Benchmarks bind and (potential) bounds-check merging.
     return $! (fromIntegral w1 :: Word32) +
         fromIntegral w2 `unsafeShiftL` 8 +
         fromIntegral w3 `unsafeShiftL` 16 +
-        fromIntegral w4 `unsafeShiftL` 32
+        fromIntegral w4 `unsafeShiftL` 32
+
+doubleQuote, backslash :: Word8
+doubleQuote = 34
+backslash = 92
+{-# INLINE backslash #-}
+{-# INLINE doubleQuote #-}
+
+-- | Parse a string without a leading quote.
+quotedString :: AB.Parser B.ByteString
+quotedString = AB.scan False $ \s c -> if s then Just False
+                                       else if c == doubleQuote
+                                            then Nothing
+                                            else Just (c == backslash)
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.