Commits

Bryan O'Sullivan committed 6f79fce

Eliminate dependency on the array package

  • Participants
  • Parent commits 814cda5

Comments (0)

Files changed (7)

File Data/Text.hs

                 IO, FilePath)
 import Data.Char (isSpace)
 import Control.Monad.ST(ST)
-import Data.Array.Base(unsafeNewArray_,unsafeWrite,unsafeAt)
-import Data.Array.ST(STUArray, runSTUArray)
+import qualified Data.Text.Array as A
 import qualified Data.ByteString as B
 import Data.ByteString(ByteString)
 import qualified Data.List as L
 -- | /O(n)/ Appends one 'Text' to the other by copying both of them
 -- into a new 'Text'.  Subject to array fusion.
 append :: Text -> Text -> Text
-append (Text arr1 off1 len1) (Text arr2 off2 len2) = Text (runSTUArray x) 0 len
+append (Text arr1 off1 len1) (Text arr2 off2 len2) = Text (A.run x) 0 len
     where
       len = len1+len2
       x = do
-        arr <- unsafeNewArray_ (0,len-1) :: ST s (STUArray s Int Word16)
+        arr <- A.unsafeNew len :: ST s (A.MArray s Word16)
         copy arr1 off1 (len1+off1) arr 0
         copy arr2 off2 (len2+off2) arr len1
         return arr
             where
               copy arr i max arr' j
                   | i >= max  = return ()
-                  | otherwise = do unsafeWrite arr' j (arr `unsafeAt` i)
+                  | otherwise = do A.unsafeWrite arr' j (arr `A.unsafeIndex` i)
                                    copy arr (i+1) max arr' (j+1)
 {-# INLINE append #-}
 
     | n < 0xDC00 || n > 0xDFFF = unsafeChr n
     | otherwise                  = U16.chr2 n0 n
     where
-      n  = unsafeAt arr (off+len-1)
-      n0 = unsafeAt arr (off+len-2)
+      n  = A.unsafeIndex arr (off+len-1)
+      n0 = A.unsafeIndex arr (off+len-2)
 {-# INLINE [1] last #-}
 
 {-# RULES
     | n >= 0xD800 && n <= 0xDBFF = Text arr (off+2) (len-2)
     | otherwise                  = Text arr (off+1) (len-1)
     where
-      n = unsafeAt arr off
+      n = A.unsafeIndex arr off
 {-# INLINE [1] tail #-}
 
 
                         | n >= 0xDC00 && n <= 0xDFFF = Text arr off (len-2)
                         | otherwise                  = Text arr off (len-1)
     where
-      n = unsafeAt arr (off+len-1)
+      n = A.unsafeIndex arr (off+len-1)
 {-# INLINE [1] init #-}
 
 {-# RULES
 -- | /O(n)/ Returns the number of characters in a 'Text'.
 -- Subject to array fusion.
 length :: Text -> Int
-length length t = S.length (stream t)
+length t = S.length (stream t)
 {-# INLINE length #-}
 
 -- -----------------------------------------------------------------------------
            | c < 0xD800 || c > 0xDBFF = loop (i+1) (count+1)
            | otherwise                = loop (i+2) (count+1)
            where
-             c = arr `unsafeAt` i
+             c = arr `A.unsafeIndex` i
 {-# INLINE [1] take #-}
 
 {-# RULES
           | c < 0xD800 || c > 0xDBFF = loop (i+1) (count+1) (l-1)
           | otherwise                = loop (i+2) (count+1) (l-2)
           where
-            c = arr `unsafeAt` i
+            c = arr `A.unsafeIndex` i
 {-# INLINE [1] drop #-}
 
 {-# RULES
                           then []
                           else [(Text arr start (n-start))]
             where
-              c = arr `unsafeAt` n
+              c = arr `A.unsafeIndex` n
 {-# INLINE words #-}
 
 errorEmptyList :: String -> a

File Data/Text/Array.hs

+{-# LANGUAGE CPP, ExistentialQuantification, MagicHash, Rank2Types,
+             ScopedTypeVariables, UnboxedTuples #-}
+-- |
+-- Module      : Data.Text.Array
+-- Copyright   : (c) Bryan O'Sullivan 2009
+--
+-- License     : BSD-style
+-- Maintainer  : bos@serpentine.com,
+-- Stability   : experimental
+-- Portability : portable
+--
+-- Packed, unboxed, heap-resident arrays.  Suitable for performance
+-- critical use, both in terms of large data quantities and high
+-- speed.
+--
+-- This module is intended to be imported @qualified@, to avoid name
+-- clashes with "Prelude" functions, e.g.
+--
+-- > import qualified Data.Text.Array as A
+--
+-- The names in this module resemble those in the 'Data.Array' family
+-- of modules, but are shorter due to the assumption of qualifid
+-- naming.
+module Data.Text.Array
+    (
+    -- * Types
+      IArray(..)
+    , Elt(..)
+    , Array
+    , MArray
+
+    -- * Functions
+    , empty
+    , new
+    , unsafeNew
+    , unsafeFreeze
+    , run
+    ) where
+
+#if defined(__GLASGOW_HASKELL__)
+#include "MachDeps.h"
+
+import GHC.Base (ByteArray#, MutableByteArray#, indexWord16Array#,
+                 newByteArray#, readWord16Array#, unsafeCoerce#,
+                 writeWord16Array#, (+#), (*#))
+import GHC.Prim (Int#)
+import GHC.ST (ST(..), runST)
+import GHC.Types (Int(..))
+import GHC.Word (Word16(..))
+
+#elif defined(__HUGS__)
+
+import Hugs.ByteArray (ByteArray, MutableByteArray, readByteArray,
+                       newMutableByteArray, readMutableByteArray,
+                       unsafeFreezeMutableByteArray, writeMutableByteArray)
+import Foreign.Storable (Storable, sizeOf)
+import Hugs.ST (ST(..), runST)
+
+#else
+# error not implemented for this compiler
+#endif
+
+import Data.Typeable
+import Data.Word (Word16)
+import Prelude hiding (length, read)
+
+#include "Typeable.h"
+
+-- | Immutable array type.
+data Array e = Array
+    {-# UNPACK #-} !Int -- length (in units of e, not bytes)
+#if defined(__GLASGOW_HASKELL__)
+    ByteArray#
+#elif defined(__HUGS__)
+    !ByteArray
+#endif
+
+INSTANCE_TYPEABLE1(Array,arrayTc,"Array")
+
+-- | Mutable array type, for use in the ST monad.
+data MArray s e = MArray
+    {-# UNPACK #-} !Int -- length (in units of e, not bytes)
+#if defined(__GLASGOW_HASKELL__)
+    (MutableByteArray# s)
+#elif defined(__HUGS__)
+    !(MutableByteArray s)
+#endif
+
+INSTANCE_TYPEABLE2(MArray,mArrayTc,"MArray")
+
+-- | Operations supported by all arrays.
+class IArray a where
+    -- | Return the length of an array.
+    length :: a -> Int
+
+instance IArray (Array e) where
+    length (Array len _ba) = len
+    {-# INLINE length #-}
+
+instance IArray (MArray s e) where
+    length (MArray len _ba) = len
+    {-# INLINE length #-}
+
+check :: IArray a => String -> a -> Int -> (a -> Int -> b) -> b
+check func ary i f
+    | i >= 0 && i < length ary = f ary i
+    | otherwise = error ("Data.Array.Flat." ++ func ++ ": index out of bounds")
+{-# INLINE check #-}
+
+-- | Operations supported by all elements that can be stored in
+-- arrays.
+class Elt e where
+    -- | Indicate how many bytes would be used for an array of the
+    -- given size.
+    bytesInArray :: Int -> e -> Int
+    -- | Unchecked read of an immutable array.  May return garbage or
+    -- crash on an out-of-bounds access.
+    unsafeIndex :: Array e -> Int -> e
+    -- | Unchecked read of a mutable array.  May return garbage or
+    -- crash on an out-of-bounds access.
+    unsafeRead :: MArray s e -> Int -> ST s e
+    -- | Unchecked write of a mutable array.  May return garbage or
+    -- crash on an out-of-bounds access.
+    unsafeWrite :: MArray s e -> Int -> e -> ST s ()
+
+    -- | Read an immutable array. An invalid index results in a
+    -- runtime error.
+    index :: Array e -> Int -> e
+    index ary i = check "index" ary i unsafeIndex
+    {-# INLINE index #-}
+
+    -- | Read a mutable array. An invalid index results in a runtime
+    -- error.
+    read :: Array e -> Int -> ST s e
+    read ary i = check "read" ary i read
+    {-# INLINE read #-}
+
+    -- | Write a mutable array. An invalid index results in a runtime
+    -- error.
+    write :: Array e -> Int -> ST s e
+    write ary i = check "write" ary i write
+    {-# INLINE write #-}
+
+-- | Freeze a mutable array. Do not mutate the 'MArray' afterwards!
+unsafeFreeze :: MArray s e -> ST s (Array e)
+
+#if defined(__GLASGOW_HASKELL__)
+
+wORD16_SCALE :: Int# -> Int#
+wORD16_SCALE n# = scale# *# n# where I# scale# = SIZEOF_WORD16
+
+-- | Create an uninitialized mutable array.
+unsafeNew :: forall s e. Elt e => Int -> ST s (MArray s e)
+unsafeNew n = ST $ \s1# ->
+   case bytesInArray n (undefined :: e) of
+     (I# len#) -> case newByteArray# len# s1# of
+                    (# s2#, marr# #) -> (# s2#, MArray n marr# #)
+{-# INLINE unsafeNew #-}
+
+unsafeFreeze (MArray len mba#) = ST $ \s# ->
+                                 (# s#, Array len (unsafeCoerce# mba#) #)
+{-# INLINE unsafeFreeze #-}
+
+-- | Create a mutable array, with its elements initialized with the
+-- given value.
+new :: forall s e. Elt e => Int -> e -> ST s (MArray s e)
+
+#elif defined(__HUGS__)
+
+unsafeIndexArray :: Storable e => Array e -> Int -> e
+unsafeIndexArray (Array off _len arr) i = readByteArray arr (off + i)
+
+unsafeReadMArray :: Storable e => MArray s e -> Int -> ST s e
+unsafeReadMArray (MArray _len marr) = readMutableByteArray marr
+
+unsafeWriteMArray :: Storable e => MArray s e -> Int -> e -> ST s ()
+unsafeWriteMArray (MArray _len marr) = writeMutableByteArray marr
+
+-- | Create an uninitialized mutable array.
+unsafeNew :: (Storable e) => Int -> ST s (MArray s e)
+unsafeNew n = new undefined
+  where new :: (Storable e) => e -> ST s (MArray s e)
+        new unused = do
+          marr <- newMutableByteArray (n * sizeOf unused)
+          return (MArray n marr)
+
+unsafeFreeze (MArray len mba) = do
+  ba <- unsafeFreezeMutableByteArray mba
+  return (Array 0 len ba)
+
+-- | Create a mutable array, with its elements initialized with the
+-- given value.
+new :: (Storable e) => Int -> e -> ST s (MArray s e)
+#endif
+
+new len initVal = do
+  marr <- unsafeNew len
+  sequence_ [unsafeWrite marr i initVal | i <- [0..len-1]]
+  return marr
+
+instance Elt Word16 where
+#if defined(__GLASGOW_HASKELL__)
+
+    bytesInArray (I# i#) _ = I# (wORD16_SCALE i#)
+    {-# INLINE bytesInArray #-}
+
+    unsafeIndex (Array _len ba#) (I# i#) =
+        case indexWord16Array# ba# i# of r# -> (W16# r#)
+    {-# INLINE unsafeIndex #-}
+
+    unsafeRead (MArray _len mba#) (I# i#) = ST $ \s# ->
+      case readWord16Array# mba# i# s# of
+        (# s2#, r# #) -> (# s2#, W16# r# #)
+    {-# INLINE unsafeRead #-}
+
+    unsafeWrite (MArray _len marr#) (I# i#) (W16# e#) = ST $ \s1# ->
+      case writeWord16Array# marr# i# e# s1# of
+        s2# -> (# s2#, () #)
+    {-# INLINE unsafeWrite #-}
+
+#elif defined(__HUGS__)
+
+    bytesInArray n w = sizeOf w * n
+    unsafeIndex = unsafeIndexArray
+    unsafeRead = unsafeReadMArray
+    unsafeWrite = unsafeWriteMArray
+
+#endif
+
+-- | An empty immutable array.
+empty :: Elt e => Array e
+empty = runST (unsafeNew 0 >>= unsafeFreeze)
+
+-- | Run an action in the ST monad and return an immutable array of
+-- its result.
+run :: Elt e => (forall s. ST s (MArray s e)) -> Array e
+run k = runST (k >>= unsafeFreeze)

File Data/Text/Fusion.hs

 import Control.Exception(assert)
 import Control.Monad(liftM2)
 import Control.Monad.ST(runST,ST)
-import Data.Array.Base
 import Data.Bits (shiftL, shiftR, (.&.))
 import qualified Data.ByteString as B
 import qualified Data.ByteString.Unsafe as B
 import System.IO.Unsafe(unsafePerformIO)
 import Data.Text.Internal(Text(..),empty)
 import Data.Text.UnsafeChar(unsafeChr,unsafeChr8,unsafeChr32)
+import qualified Data.Text.Array as A
 import qualified Data.Text.Utf8 as U8
 import qualified Data.Text.Utf16 as U16
 import qualified Data.Text.Utf32 as U32
           | n >= 0xD800 && n <= 0xDBFF = Yield (U16.chr2 n n2) (i + 2)
           | otherwise = Yield (unsafeChr n) (i + 1)
           where
-            n  = unsafeAt arr i
-            n2 = unsafeAt arr (i + 1)
+            n  = A.unsafeIndex arr i
+            n2 = A.unsafeIndex arr (i + 1)
 {-# INLINE [0] stream #-}
 
 -- | /O(n)/ Convert a Stream Char into a Text.
 unstream :: Stream Char -> Text
 unstream (Stream next0 s0 len) = Text (fst a) 0 (snd a)
     where
-      a :: ((UArray Int Word16),Int)
-      a = runST ((unsafeNewArray_ (0,len+1) :: ST s (STUArray s Int Word16))
-                 >>= (\arr -> loop arr 0 (len+1) s0))
+      a :: ((A.Array Word16),Int)
+      a = runST ((A.unsafeNew len :: ST s (A.MArray s Word16))
+                 >>= (\arr -> loop arr 0 len s0))
       loop arr !i !top !s
-          | i + 1 > top = do arr' <- unsafeNewArray_ (0,top*2)
+          | i + 1 > top = do arr' <- A.unsafeNew (top*2)
                              case next0 s of
-                               Done -> liftM2 (,) (unsafeFreezeSTUArray arr) (return i)
+                               Done -> liftM2 (,) (A.unsafeFreeze arr) (return i)
                                _    -> copy arr arr' >> loop arr' i (top*2) s
           | otherwise = case next0 s of
-               Done       -> liftM2 (,) (unsafeFreezeSTUArray arr) (return i)
+               Done       -> liftM2 (,) (A.unsafeFreeze arr) (return i)
                Skip s'    -> loop arr i top s'
                Yield x s'
                    | n < 0x10000 -> do
-                        unsafeWrite arr i (fromIntegral n :: Word16)
+                        A.unsafeWrite arr i (fromIntegral n :: Word16)
                         loop arr (i+1) top s'
                    | otherwise   -> do
-                        unsafeWrite arr i       l
-                        unsafeWrite arr (i + 1) r
+                        A.unsafeWrite arr i       l
+                        A.unsafeWrite arr (i + 1) r
                         loop arr (i+2) top s'
                    where
                      n :: Int
 {-# INLINE [0] unstream #-}
 
 
-copy :: STUArray s Int Word16 -> STUArray s Int Word16 -> ST s ()
-copy src dest = (do
-                   (_,top) <- getBounds src
-                   copy_loop 0 top)
+copy :: A.MArray s Word16 -> A.MArray s Word16 -> ST s ()
+copy src dest = copy_loop 0
     where
-      copy_loop i top
-          | i > top    = return ()
-          | otherwise = do v <- unsafeRead src i
-                           unsafeWrite dest i v
-                           copy_loop (i+1) top
+      len = A.length src
+      copy_loop i
+          | i > len   = return ()
+          | otherwise = do A.unsafeRead src i >>= A.unsafeWrite dest i
+                           copy_loop (i+1)
 
 -- | /O(n)/ Determines if two streams are equal.
 eq :: Ord a => Stream a -> Stream a -> Bool

File Data/Text/Internal.hs

     , empty
     ) where
 
-import Data.Array.ST (newArray_,runSTUArray)
-import Data.Array.Unboxed (UArray)
+import qualified Data.Text.Array as A
 import Data.Typeable (Typeable)
 import Data.Word (Word16)
 
 -- | A space efficient, packed, unboxed Unicode text type.
-data Text = Text {-# UNPACK #-} !(UArray Int Word16) -- payload
-                 {-# UNPACK #-} !Int                 -- offset
-                 {-# UNPACK #-} !Int                 -- length
+data Text = Text {-# UNPACK #-} !(A.Array Word16) -- payload
+                 {-# UNPACK #-} !Int              -- offset
+                 {-# UNPACK #-} !Int              -- length
             deriving (Typeable)
 
 -- | /O(1)/ The empty 'Text'.
 empty :: Text
-empty = Text (runSTUArray (newArray_ (0,0))) 0 0
+empty = Text A.empty 0 0
 {-# INLINE [1] empty #-}

File tests/Bench.hs

 import Data.Text.Fusion (Encoding(..))
 
 import qualified Data.List as L
-import qualified Data.ByteString as B
+import qualified Data.ByteString.Char8 as B
+import qualified Data.ByteString as B8
 import Data.ByteString (ByteString)
 import Data.Word
 import qualified System.IO.UTF8 as UTF8
 
-main = do ascii_bs <- B.readFile "ascii.txt"
+main = do ascii_bs <- B.readFile "text/test/ascii.txt"
           let ascii_txt = T.decode ASCII ascii_bs 
           let ascii_str = T.unpack ascii_txt
           force (ascii_txt,ascii_str,ascii_bs)
           printf " # Text\t\tString\tByteString\n"
           run 1 (ascii_txt,ascii_str,ascii_bs) ascii_tests
-          performGC
-          bmp_txt <- T.readFile Utf8 "bmp.txt"
-          let bmp_str = T.unpack bmp_txt
-          force (bmp_txt,bmp_str)
-          printf " # Text\t\tString\t\n"
-          run 1 (bmp_txt, bmp_str, B.empty)     bmp_tests
-          performGC
-          smp_sip_txt <- T.readFile Utf8 "smp_sip.txt"
-          let smp_sip_str = T.unpack smp_sip_txt
-          force (smp_sip_txt, smp_sip_str)
-          printf " # Text\t\tString\t\n"
-          run 1 (smp_sip_txt, smp_sip_str,B.empty) smp_sip_tests
           
           
 ascii_tests = [
                 ("cons",
                  [F (app1 (T.cons '\88')),
                   F (app2 ((:) '\88')    ),
-                  F (app3 (B.cons 88) )]),
+                  F (app3 (B8.cons 88) )]),
                 ("head",
                  [F (app1 T.head), 
                   F (app2 L.head),
                  ("filter",
                   [F $ app1 $ T.filter (/= '\101'),
                    Flist $ app2 $ L.filter (/= '\101'),
-                   F $ app3 $ B.filter (/= 101)]),
+                   F $ app3 $ B8.filter (/= 101)]),
                  ("foldl'",
                   [F (app1 $ T.foldl' (\a w -> a+1::Int) 0),
                    F (app2 $ L.foldl' (\a w -> a+1::Int) 0),
                    F (app3 $ B.take 30000000)]),
                  ("words",
                   [F (app1 $ T.words),
-                   Flist (app2 $ L.words)])
+                   Flist (app2 $ L.words),
+                   F (app3 $ B.words)])
  ]
 
 bmp_tests = [

File tests/FusionBench.hs

-import Prelude hiding (zip,zip3,fst,snd)
+import Prelude hiding (zip,zip3)
 
 import BenchUtils
 import Data.Char
           printf " # Text\t\tString\tByteString\n"
           run 1 (ascii_txt,ascii_str,ascii_bs) ascii_tests
 
+trd (a,b,c) = c
+
 ascii_tests =  [
  ("map/map",
   [F $ T.map pred . T.map succ . fst, 
 library
   exposed-modules:
     Data.Text
+    Data.Text.Array
     Data.Text.UnsafeChar
     Data.Text.Internal
     Data.Text.Fusion
     Data.Text.Utf16
 
   build-depends:
-    base       >= 3   && < 4,
-    array      >= 0.1 && < 0.3,
+    base       < 5,
     bytestring >= 0.9 && < 1.0
   if impl(ghc >= 6.10)
     build-depends:
-      ghc-prim
+      ghc-prim, base >= 4
 
   -- gather extensive profiling data for now
   ghc-prof-options: -auto-all