Commits

Bryan O'Sullivan committed 24b450b

Unacceptably slow thread-safe duplicate write detection

The atomic increments slow down the code by horrifying amounts (a
factor of 2 or so for the takeWhile benchmark, for instance).

Comments (0)

Files changed (4)

Data/Attoparsec/ByteString/Buffer.hs

+{-# LANGUAGE ForeignFunctionInterface #-}
 -- |
 -- Module      :  Data.Attoparsec.ByteString.Buffer
 -- Copyright   :  Bryan O'Sullivan 2007-2014
     , unsafeDrop
     ) where
 
+import Control.Applicative ((<$>))
 import Control.Exception (assert)
 import Data.ByteString.Internal (ByteString(..), memcpy, nullForeignPtr)
 import Data.Attoparsec.Internal.Fhthagn (inlinePerformIO)
 import Data.List (foldl1')
 import Data.Monoid (Monoid(..))
 import Data.Word (Word8)
+import Foreign.C.Types (CLong(..))
 import Foreign.ForeignPtr (ForeignPtr, withForeignPtr)
-import Foreign.Ptr (castPtr, plusPtr)
-import Foreign.Storable (peek, peekByteOff, poke, sizeOf)
+import Foreign.Ptr (Ptr, castPtr, plusPtr)
+import Foreign.Storable (peekByteOff, poke, sizeOf)
 import GHC.ForeignPtr (mallocPlainForeignPtrBytes)
 import Prelude hiding (length)
 
               newlen = len0 + len1
           gen <- if gen0 == 0
                  then return 0
-                 else peek (castPtr ptr0)
-          if gen == gen0 && newlen <= cap0
+                 else fromIntegral <$> atomic_inc (castPtr ptr0)
+          if newlen <= cap0 && gen == gen0 + 1
             then do
-              let newgen = gen + 1
-              poke (castPtr ptr0) newgen
               memcpy (ptr0 `plusPtr` (off0+len0))
                      (ptr1 `plusPtr` off1)
                      (fromIntegral len1)
-              return (Buf fp0 off0 newlen cap0 newgen)
+              return (Buf fp0 off0 newlen cap0 gen)
             else do
               let newcap = newlen * 2
               fp <- mallocPlainForeignPtrBytes (newcap + genSize)
   assert (s >= 0 && s <= len) $
   PS fp (off+s) (len-s)
 {-# INLINE unsafeDrop #-}
+
+foreign import ccall unsafe "_atto_atomic_inc" atomic_inc
+    :: Ptr CLong -> IO CLong
                  deepseq,
                  scientific >= 0.3.1 && < 0.4,
                  text >= 1.1.1.3
+  c-sources:     cbits/cbits.c
   if impl(ghc < 7.4)
     build-depends:
       bytestring < 0.10.4.0
 test-suite tests
   type:           exitcode-stdio-1.0
   hs-source-dirs: tests .
+  c-sources:      cbits/cbits.c
   main-is:        QC.hs
   other-modules:  QC.Buffer
                   QC.ByteString
 benchmark benchmarks
   type: exitcode-stdio-1.0
   hs-source-dirs: benchmarks, .
+  c-sources:    cbits/cbits.c
   ghc-options: -O2 -Wall -rtsopts
   main-is: Benchmarks.hs
   other-modules:

benchmarks/attoparsec-benchmarks.cabal

     Links
     Numbers
   hs-source-dirs: .. .
+  c-sources: ../cbits/cbits.c
   ghc-options: -O2 -Wall -rtsopts
   build-depends:
     array,
+long _atto_atomic_inc(long *ptr)
+{
+  return __sync_fetch_and_add(ptr, 1);
+}