Commits

Bryan O'Sullivan committed 53b9461

More progress on laziness

  • Participants
  • Parent commits 79a087d

Comments (0)

Files changed (7)

File Data/Text.hs

     , zipWith
 
     -- -* Ordered text
-    , -- sort
+    -- , sort
     ) where
 
 import Prelude (Char, Bool(..), Functor(..), Int, Maybe(..), String,
 -- | /O(1)/ Convert a character into a Text.
 -- Subject to array fusion.
 singleton :: Char -> Text
-singleton c = unstream (Stream next (c:[]) 1)
-    where
-      {-# INLINE next #-}
-      next (k:ks) = Yield k ks
-      next []     = Done
+singleton = unstream . S.singleton
 {-# INLINE [1] singleton #-}
 
 -- -----------------------------------------------------------------------------

File Data/Text/Fusion.hs

             n2 = A.unsafeIndex arr (i - 1)
 {-# INLINE [0] reverseStream #-}
 
--- | /O(n)/ Convert a Stream Char into a Text.
+-- | /O(n)/ Convert a 'Stream Char' into a 'Text'.
 unstream :: Stream Char -> Text
 unstream (Stream next0 s0 len)
     | len == 0 = I.empty

File Data/Text/Fusion/Internal.hs

     , Switch(..)
     , Step(..)
     , Stream(..)
+    , singleton
     , empty
     , streamList
     , unstreamList
     !s                          -- current state
     {-# UNPACK #-}!Int          -- length hint
 
+singleton :: Char -> Stream Char
+singleton c = Stream next False 1
+    where next False = Yield c True
+          next True  = Done
+{-# INLINE singleton #-}
+
 -- | The empty stream.
 empty :: Stream a
 empty = Stream next () 0

File Data/Text/Lazy.hs

 {-# OPTIONS_GHC -fno-warn-orphans #-}
+-- |
+-- Module      : Data.Text.Lazy
+-- Copyright   : (c) Bryan O'Sullivan 2009,
+--               (c) Tom Harper 2008-2009,
+--               (c) Duncan Coutts 2008
+--
+-- License     : BSD-style
+-- Maintainer  : rtharper@aftereternity.co.uk, bos@serpentine.com,
+--               duncan@haskell.org
+-- Stability   : experimental
+-- Portability : GHC
+--
+-- A time and space-efficient implementation of Unicode text using
+-- lists of packed arrays.  This representation is suitable for high
+-- performance use and for streaming large quantities of data.  It
+-- provides a means to manipulate a large body of text without
+-- requiring that the entire content be resident in memory.
+--
+-- Some operations, such as 'concat', 'append', 'reverse' and 'cons',
+-- have better complexity than their "Data.Text" equivalents, due to
+-- optimisations resulting from the list spine structure. And for
+-- other operations lazy 'Text's are usually within a few percent of
+-- strict ones, but with better heap usage. For data larger than
+-- available memory, or if you have tight memory constraints, this
+-- module will be the only option.
+--
+-- This module is intended to be imported @qualified@, to avoid name
+-- clashes with "Prelude" functions.  eg.
+--
+-- > import qualified Data.Text.Lazy as B
 
 module Data.Text.Lazy
     (
       Text
+    -- * Creation and elimination
     , pack
     , unpack
+    , singleton
+    , empty
+
     -- * Basic interface
+    -- , cons
+    -- , snoc
     , append
+    -- , uncons
+    -- , head
+    -- , last
+    -- , tail
+    -- , init
+    -- , null
+    -- , length
+
+    -- * Transformations
+    -- , map
+    -- , intercalate
+    -- , intersperse
+    -- , transpose
+    -- , reverse
+
+    -- * Folds
+    -- , foldl
+    -- , foldl'
+    -- , foldl1
+    -- , foldl1'
+    -- , foldr
+    -- , foldr1
+
+    -- ** Special folds
+    -- , concat
+    -- , concatMap
+    -- , any
+    -- , all
+    -- , maximum
+    -- , minimum
+
+    -- * Construction
+
+    -- ** Scans
+    -- , scanl
+    -- , scanl1
+    -- , scanr
+    -- , scanr1
+
+    -- ** Accumulating maps
+    -- , mapAccumL
+    -- , mapAccumR
+
+    -- ** Generation and unfolding
+    -- , replicate
+    -- , unfoldr
+    -- , unfoldrN
+
+    -- * Substrings
+
+    -- ** Breaking strings
+    -- , take
+    -- , drop
+    -- , takeWhile
+    -- , dropWhile
+    -- , splitAt
+    -- , span
+    -- , break
+    -- , group
+    -- , groupBy
+    -- , inits
+    -- , tails
+
+    -- ** Breaking into many substrings
+    -- , split
+    -- , splitWith
+    -- , breakSubstring
+
+    -- ** Breaking into lines and words
+    -- , lines
+    --, lines'
+    -- , words
+    -- , unlines
+    -- , unwords
+
+    -- * Predicates
+    -- , isPrefixOf
+    -- , isSuffixOf
+    -- , isInfixOf
+
+    -- * Searching
+    -- , elem
+    -- , filter
+    -- , find
+    -- , partition
+
+    -- , findSubstring
+    
+    -- * Indexing
+    -- , index
+    -- , findIndex
+    -- , findIndices
+    -- , elemIndex
+    -- , elemIndices
+    -- , count
+
+    -- * Zipping and unzipping
+    -- , zipWith
+
+    -- -* Ordered text
+    -- , sort
     ) where
 
 import Data.String (IsString(..))
+import qualified Data.Text as T
 import qualified Data.Text.Fusion as S
-import Data.Text.Fusion.Internal as S
+import qualified Data.Text.Fusion.Internal as S
 import Data.Text.Lazy.Fusion
 import Data.Text.Lazy.Internal
 
 -- | /O(n)/ Convert a 'Text' into a 'String'.
 -- Subject to array fusion.
 unpack :: Text -> String
-unpack = unstreamList . stream
+unpack = S.unstreamList . stream
 {-# INLINE [1] unpack #-}
 
+singleton :: Char -> Text
+singleton c = Chunk (T.singleton c) Empty
+{-# INLINE [1] singleton #-}
+
+{-# RULES
+"TEXT singleton -> fused" [~1] forall c.
+    singleton c = unstream (S.singleton c)
+"TEXT singleton -> unfused" [1] forall c.
+    unstream (S.singleton c) = singleton c
+  #-}
+
 -- | /O(n\/c)/ Append two 'Text's
 append :: Text -> Text -> Text
 append xs ys = foldrChunks Chunk ys xs

File Data/Text/Lazy/Fusion.hs

 -- |
 -- Module      : Data.Text.Lazy.Fusion
--- Copyright   : (c) Tom Harper 2008-2009,
---               (c) Bryan O'Sullivan 2009,
---               (c) Duncan Coutts 2009
+-- Copyright   : (c) Bryan O'Sullivan 2009
 --
 -- License     : BSD-style
 -- Maintainer  : rtharper@aftereternity.co.uk, bos@serpentine.com,
     (
       stream
     , unstream
+    , unstreamChunks
     ) where
 
 import Data.Text.Fusion.Internal
 
 default(Int)
 
+-- | /O(n)/ Convert a 'Text' into a 'Stream Char'.
 stream :: Text -> Stream Char
 stream text = Stream next (text :!: 0) 4
   where
         where (c,d) = iter t i
 {-# INLINE [0] stream #-}
 
-unstream :: Stream Char -> Text
-unstream (Stream next s0 len0)
+-- | /O(n)/ Convert a 'Stream Char' into a 'Text', using the given
+-- chunk size.
+unstreamChunks :: Int -> Stream Char -> Text
+unstreamChunks chunkSize (Stream next s0 len0)
   | len0 == 0 = Empty
   | otherwise = outer s0
   where
     outer s = case next s of
-                Done    -> Empty
-                Skip s' -> outer s'
-                Yield x s' -> chunk t (outer s'')
-                  where (t,s'') = fill x s'
-    fill x s = (I.Text a 0 l,s')
-        where (a,(s',l)) = A.run2 (A.unsafeNew initLen >>= (\arr -> inner arr initLen x s 0))
-    initLen = 8
+                Done       -> Empty
+                Skip s'    -> outer s'
+                Yield x s' -> I.Text arr 0 len `chunk` outer s''
+                  where (arr,(s'',len)) = A.run2 fill
+                        fill = do arr <- A.unsafeNew unknownLength
+                                  inner arr unknownLength x s' 0
+                        unknownLength = 4
     inner marr len x s i
-        | i + 1 >= defaultChunkSize = return (marr, (s,i))
-        | i + 1 >= len = do
-            let newLen = min (len * 2) defaultChunkSize
+        | i + 1 >= chunkSize = return (marr, (s,i))
+        | i + 1 >= len       = do
+            let newLen = min (len * 2) chunkSize
             marr' <- A.unsafeNew newLen
             A.copy marr marr'
             inner marr' newLen x s i
                                 return (marr,(s,i'))
               Skip s'     -> inner marr len x s i
               Yield x' s' -> unsafeWrite marr i x >>= inner marr len x' s' 
+{-# INLINE [0] unstreamChunks #-}
+
+-- | /O(n)/ Convert a 'Stream Char' into a 'Text', using
+-- 'defaultChunkSize'.
+unstream :: Stream Char -> Text
+unstream = unstreamChunks defaultChunkSize
 {-# INLINE [0] unstream #-}
+
 {-# RULES "STREAM stream/unstream fusion" forall s. stream (unstream s) = s #-}

File Data/Text/Lazy/Internal.hs

     (
       Text(..)
     , chunk
+    , empty
     , foldrChunks
     , foldlChunks
     -- * Data type invariant and abstraction functions
 chunk t@(T.Text _ _ len) ts | len == 0 = ts
                             | otherwise = Chunk t ts
 
+-- | Smart constructor for 'Empty'.
+empty :: Text
+{-# INLINE [0] empty #-}
+empty = Empty
+
 -- | Consume the chunks of a lazy 'Text' with a natural right fold.
 foldrChunks :: (T.Text -> a -> a) -> a -> Text -> a
 foldrChunks f z = go

File tests/Properties.hs

 prop_TL_stream_unstream  = (SL.unstream . SL.stream) `eq` id
 prop_T_reverse_stream t  = (S.reverse . S.reverseStream) t == t
 prop_T_singleton c       = [c] == (T.unpack . T.singleton) c
+prop_TL_singleton c      = [c] == (TL.unpack . TL.singleton) c
 
 prop_T_ascii t           = E.decodeASCII (E.encodeUtf8 a) == a
     where a            = T.map (\c -> chr (ord c `mod` 128)) t
   ("prop_TL_stream_unstream", mytest prop_TL_stream_unstream),
   ("prop_T_reverse_stream", mytest prop_T_reverse_stream),
   ("prop_T_singleton", mytest prop_T_singleton),
+  ("prop_TL_singleton", mytest prop_TL_singleton),
 
   ("prop_T_ascii", mytest prop_T_ascii),
   ("prop_T_utf8", mytest prop_T_utf8),