Commits

Bryan O'Sullivan committed addcdeb

Back out the repeated-append-to-same-Buffer changes

I'm torn over this. The use-counting changes obviously improve
safety, but they do so for a case that should never arise in practice,
and they come with a *steep* cost in performance (many benchmarks
regress to slower than 0.11).

I am inclined to either try to find a near-zero-cost alternative
that preserves safety, or to document the API as "this is a single-use
continuation, so don't call it more than once".

Comments (0)

Files changed (3)

Data/Attoparsec/ByteString/Buffer.hs

     , buffer
     , unbuffer
     , length
-    , pappend
     , unsafeIndex
     , substring
     , unsafeDrop
     ) where
 
-import Control.Applicative ((<$>))
 import Control.Exception (assert)
 import Data.ByteString.Internal (ByteString(..), memcpy, nullForeignPtr)
 import Data.Attoparsec.Internal.Fhthagn (inlinePerformIO)
-import Data.IORef (IORef, atomicModifyIORef, newIORef)
 import Data.List (foldl1')
 import Data.Monoid (Monoid(..))
 import Data.Word (Word8)
 import Foreign.Storable (peekByteOff)
 import GHC.ForeignPtr (mallocPlainForeignPtrBytes)
 import Prelude hiding (length)
-import System.IO.Unsafe (unsafePerformIO)
 
 data Buffer = Buf {
       _fp  :: {-# UNPACK #-} !(ForeignPtr Word8)
     , _off :: {-# UNPACK #-} !Int
     , _len :: {-# UNPACK #-} !Int
     , _cap :: {-# UNPACK #-} !Int
-    , _gen :: {-# UNPACK #-} !Int
-    , _ref :: {-# UNPACK #-} !(IORef Int)
     }
 
 instance Show Buffer where
 -- copies in the (hopefully) common case of no further input being fed
 -- to us.
 buffer :: ByteString -> Buffer
-buffer (PS fp off len) = inlinePerformIO $
-  Buf fp off len len 0 <$> newIORef 0
+buffer (PS fp off len) = Buf fp off len len
 
 unbuffer :: Buffer -> ByteString
-unbuffer (Buf fp off len _cap _gen _ref) = PS fp off len
+unbuffer (Buf fp off len _cap) = PS fp off len
 
 instance Monoid Buffer where
-    mempty = unsafePerformIO $ Buf nullForeignPtr 0 0 0 0 <$> newIORef 0
-    {-# NOINLINE mempty #-}
+    mempty = Buf nullForeignPtr 0 0 0
 
-    mappend     (Buf _ _ _ 0 _ _) b    = b
-    mappend a   (Buf _ _ _ 0 _ _)      = a
-    mappend buf (Buf fp off len _ _ _) = append buf fp off len
+    mappend (Buf _ _ _ 0) b = b
+    mappend a (Buf _ _ _ 0) = a
+    mappend (Buf fp0 off0 len0 cap0) (Buf fp1 off1 len1 _cap1) =
+      inlinePerformIO . withForeignPtr fp0 $ \ptr0 ->
+        withForeignPtr fp1 $ \ptr1 -> do
+          let newlen = len0 + len1
+          if newlen <= cap0
+            then do
+              memcpy (ptr0 `plusPtr` (off0+len0))
+                     (ptr1 `plusPtr` off1)
+                     len1
+              return (Buf fp0 off0 newlen cap0)
+            else do
+              let newcap = newlen * 2
+              fp <- mallocPlainForeignPtrBytes newcap
+              withForeignPtr fp $ \ptr -> do
+                memcpy ptr (ptr0 `plusPtr` off0) len0
+                memcpy (ptr `plusPtr` len0) (ptr1 `plusPtr` off1) len1
+              return (Buf fp 0 newlen newcap)
 
     mconcat [] = mempty
     mconcat xs = foldl1' mappend xs
 
-pappend :: Buffer -> ByteString -> Buffer
-pappend buf (PS fp off len) = append buf fp off len
-
-append :: Buffer -> ForeignPtr a -> Int -> Int -> Buffer
-append (Buf fp0 off0 len0 cap0 gen ref) fp1 off1 len1 =
-  inlinePerformIO . withForeignPtr fp0 $ \ptr0 ->
-    withForeignPtr fp1 $ \ptr1 -> do
-      gen' <- atomicModifyIORef ref (\a -> let a' = a+1 in (a',a'))
-      let newlen = len0 + len1
-      if gen' == gen+1 && newlen <= cap0
-        then do
-          memcpy (ptr0 `plusPtr` (off0+len0))
-                 (ptr1 `plusPtr` off1)
-                 len1
-          return (Buf fp0 off0 newlen cap0 gen' ref)
-        else do
-          let newcap = newlen * 2
-          fp <- mallocPlainForeignPtrBytes newcap
-          withForeignPtr fp $ \ptr -> do
-            memcpy ptr (ptr0 `plusPtr` off0) len0
-            memcpy (ptr `plusPtr` len0) (ptr1 `plusPtr` off1) len1
-          return (Buf fp 0 newlen newcap gen' ref)
-
 length :: Buffer -> Int
-length (Buf _ _ len _ _ _) = len
+length (Buf _ _ len _) = len
 {-# INLINE length #-}
 
 unsafeIndex :: Buffer -> Int -> Word8
-unsafeIndex (Buf fp off len _ _ _) i = assert (i >= 0 && i < len) .
+unsafeIndex (Buf fp off len _cap) i = assert (i >= 0 && i < len) .
     inlinePerformIO . withForeignPtr fp $ flip peekByteOff (off+i)
 {-# INLINE unsafeIndex #-}
 
 substring :: Int -> Int -> Buffer -> ByteString
-substring s l (Buf fp off len _ _ _) =
+substring s l (Buf fp off len _cap) =
   assert (s >= 0 && s <= len) .
   assert (l >= 0 && l <= len-s) $
   PS fp (off+s) l
 {-# INLINE substring #-}
 
 unsafeDrop :: Int -> Buffer -> ByteString
-unsafeDrop s (Buf fp off len _ _ _) =
+unsafeDrop s (Buf fp off len _cap) =
   assert (s >= 0 && s <= len) $
   PS fp (off+s) (len-s)
 {-# INLINE unsafeDrop #-}

Data/Attoparsec/ByteString/Internal.hs

 prompt t pos _more lose succ = Partial $ \s ->
   if B.null s
   then lose t pos Complete
-  else succ (Buf.pappend t s) pos Incomplete
+  else succ (t <> buffer s) pos Incomplete
 
 -- | Immediately demand more input via a 'Partial' continuation
 -- result.

Data/Attoparsec/Text/Buffer.hs

-{-# LANGUAGE CPP, MagicHash, RankNTypes, RecordWildCards, UnboxedTuples #-}
+{-# LANGUAGE MagicHash, RankNTypes, RecordWildCards, UnboxedTuples #-}
 
 -- |
 -- Module      :  Data.Attoparsec.Text.Buffer
     , dropWord16
     ) where
 
-import Control.Applicative ((<$>))
 import Control.Exception (assert)
-import Data.Attoparsec.Internal.Fhthagn (inlinePerformIO)
-import Data.IORef (IORef, atomicModifyIORef, newIORef)
 import Data.List (foldl1')
 import Data.Monoid (Monoid(..))
 import Data.Text ()
 import GHC.Base (unsafeCoerce#)
 import GHC.ST (ST(..), runST)
 import Prelude hiding (length)
-import System.IO.Unsafe (unsafePerformIO)
 import qualified Data.Text.Array as A
 
-#if __GLASGOW_HASKELL__ >= 702
-import Control.Monad.ST.Unsafe (unsafeIOToST)
-#else
-import Control.Monad.ST (unsafeIOToST)
-#endif
-
 data Buffer = Buf {
       _arr :: {-# UNPACK #-} !A.Array
     , _off :: {-# UNPACK #-} !Int
     , _len :: {-# UNPACK #-} !Int
     , _cap :: {-# UNPACK #-} !Int
-    , _gen :: {-# UNPACK #-} !Int
-    , _ref :: {-# UNPACK #-} !(IORef Int)
     }
 
 instance Show Buffer where
 -- copies in the (hopefully) common case of no further input being fed
 -- to us.
 buffer :: Text -> Buffer
-buffer (Text arr off len) = inlinePerformIO $
-  Buf arr off len len 0 <$> newIORef 0
+buffer (Text arr off len) = Buf arr off len len
 
 unbuffer :: Buffer -> Text
-unbuffer (Buf arr off len _ _ _) = Text arr off len
+unbuffer (Buf arr off len _cap) = Text arr off len
 
 instance Monoid Buffer where
-    mempty = unsafePerformIO $ Buf A.empty 0 0 0 0 <$> newIORef 0
-    {-# NOINLINE mempty #-}
+    mempty = Buf A.empty 0 0 0
 
-    mappend (Buf _ _ _ 0 _ _) b = b
-    mappend a (Buf _ _ _ 0 _ _) = a
-    mappend (Buf arr0 off0 len0 cap0 gen ref)
-            (Buf arr1 off1 len1 _ _ _) = runST $ do
-      gen' <- unsafeIOToST $
-              atomicModifyIORef ref (\a -> let a' = a+1 in (a',a'))
+    mappend (Buf _ _ _ 0) b = b
+    mappend a (Buf _ _ _ 0) = a
+    mappend (Buf arr0 off0 len0 cap0) (Buf arr1 off1 len1 _cap1) = runST $ do
       let newlen = len0 + len1
-      if gen' == gen+1 && newlen <= cap0
+      if newlen <= cap0
         then do
           marr <- unsafeThaw arr0
           A.copyI marr (off0+len0) arr1 off1 (off0+newlen)
           arr2 <- A.unsafeFreeze marr
-          return (Buf arr2 off0 newlen cap0 gen' ref)
+          return (Buf arr2 off0 newlen cap0)
         else do
           let newcap = newlen * 2
           marr <- A.new newcap
           A.copyI marr 0 arr0 off0 len0
           A.copyI marr len0 arr1 off1 newlen
           arr2 <- A.unsafeFreeze marr
-          return (Buf arr2 0 newlen newcap gen' ref)
+          return (Buf arr2 0 newlen newcap)
 
     mconcat [] = mempty
     mconcat xs = foldl1' mappend xs
 
 length :: Buffer -> Int
-length (Buf _ _ len _ _ _) = len
+length (Buf _ _ len _) = len
 {-# INLINE length #-}
 
 substring :: Int -> Int -> Buffer -> Text
-substring s l (Buf arr off len _ _ _) =
+substring s l (Buf arr off len _cap) =
   assert (s >= 0 && s <= len) .
   assert (l >= 0 && l <= len-s) $
   Text arr (off+s) l
 {-# INLINE substring #-}
 
 dropWord16 :: Int -> Buffer -> Text
-dropWord16 s (Buf arr off len _ _ _) =
+dropWord16 s (Buf arr off len _cap) =
   assert (s >= 0 && s <= len) $
   Text arr (off+s) (len-s)
 {-# INLINE dropWord16 #-}
 -- array, returning the current character and the delta to add to give
 -- the next offset to iterate at.
 iter :: Buffer -> Int -> Iter
-iter (Buf arr off _len _ _ _) i
+iter (Buf arr off _len _cap) i
     | m < 0xD800 || m > 0xDBFF = Iter (unsafeChr m) 1
     | otherwise                = Iter (chr2 m n) 2
   where m = A.unsafeIndex arr j
 -- | /O(1)/ Iterate one step through a UTF-16 array, returning the
 -- delta to add to give the next offset to iterate at.
 iter_ :: Buffer -> Int -> Int
-iter_ (Buf arr off _len _ _ _) i | m < 0xD800 || m > 0xDBFF = 1
+iter_ (Buf arr off _len _cap) i | m < 0xD800 || m > 0xDBFF = 1
                                 | otherwise                = 2
   where m = A.unsafeIndex arr (off+i)
 {-# INLINE iter_ #-}