Commits

Bryan O'Sullivan committed 78c55bd

Implement the other API enhancements in the Lazy module.

  • Participants
  • Parent commits 0c65b90

Comments (0)

Files changed (2)

File Data/Text/Lazy.hs

 {-# OPTIONS_GHC -fno-warn-orphans #-}
+{-# LANGUAGE BangPatterns #-}
 -- |
 -- Module      : Data.Text.Lazy
 -- Copyright   : (c) Bryan O'Sullivan 2009
     , intersperse
     , transpose
     , reverse
+    , replace
 
     -- ** Case conversion
     -- $case
     , drop
     , takeWhile
     , dropWhile
+    , dropWhileEnd
+    , dropAround
+    , strip
+    , stripStart
+    , stripEnd
     , splitAt
     , span
     , break
     , tails
 
     -- ** Breaking into many substrings
+    -- $split
     , split
+    , splitTimes
+    , splitTimesEnd
     , splitWith
     , chunksOf
     -- , breakSubstring
 
 import Prelude (Char, Bool(..), Int, Maybe(..), String,
                 Eq(..), Ord(..), Read(..), Show(..),
-                (&&), (+), (-), (.), ($), (++),
+                (&&), (||), (+), (-), (.), ($), (++),
                 div, flip, fromIntegral, not, otherwise)
 import qualified Prelude as P
 import Data.Int (Int64)
   where rev a Empty        = a
         rev a (Chunk t ts) = rev (Chunk (T.reverse t) a) ts
 
+-- | /O(m)*O(n)/ Replace every occurrence of one substring with another.
+replace :: Text                 -- ^ Text to search for
+        -> Text                 -- ^ Replacement text
+        -> Text                 -- ^ Input text
+        -> Text
+replace s d = intercalate d . split s
+{-# INLINE replace #-}
+
 -- ----------------------------------------------------------------------------
 -- ** Case conversions (folds)
 
 -- | /O(n)/ 'drop' @n@, applied to a 'Text', returns the suffix of the
 -- 'Text' of length @n@, or the empty 'Text' if @n@ is greater than the
 -- length of the 'Text'. Subject to fusion.
-drop :: Int -> Text -> Text
+drop :: Int64 -> Text -> Text
 drop i t0
     | i <= 0 = t0
     | otherwise = drop' i t0
 "LAZY TEXT dropWhile -> unfused" [1] forall p t.
     unstream (S.dropWhile p (stream t)) = dropWhile p t
   #-}
+-- | /O(n)/ 'dropWhileEnd' @p@ @t@ returns the prefix remaining after
+-- dropping characters that fail the predicate @p@ from the end of
+-- @t@.
+-- Examples:
+--
+-- > dropWhileEnd (=='.') "foo..." == "foo"
+dropWhileEnd :: (Char -> Bool) -> Text -> Text
+dropWhileEnd p = go
+  where go Empty = Empty
+        go (Chunk t Empty) = if T.null t'
+                             then Empty
+                             else Chunk t' Empty
+            where t' = T.dropWhileEnd p t
+        go (Chunk t ts) = case go ts of
+                            Empty -> go (Chunk t Empty)
+                            ts' -> Chunk t ts'
+{-# INLINE dropWhileEnd #-}
+
+-- | /O(n)/ 'dropAround' @p@ @t@ returns the substring remaining after
+-- dropping characters that fail the predicate @p@ from both the
+-- beginning and end of @t@.  Subject to fusion.
+dropAround :: (Char -> Bool) -> Text -> Text
+dropAround p = dropWhile p . dropWhileEnd p
+{-# INLINE [1] dropAround #-}
+
+-- | /O(n)/ Remove leading white space from a string.  Equivalent to:
+--
+-- > dropWhile isSpace
+stripStart :: Text -> Text
+stripStart = dropWhile isSpace
+{-# INLINE [1] stripStart #-}
+
+-- | /O(n)/ Remove trailing white space from a string.  Equivalent to:
+--
+-- > dropWhileEnd isSpace
+stripEnd :: Text -> Text
+stripEnd = dropWhileEnd isSpace
+{-# INLINE [1] stripEnd #-}
+
+-- | /O(n)/ Remove leading and trailing white space from a string.
+-- Equivalent to:
+--
+-- > dropAround isSpace
+strip :: Text -> Text
+strip = dropAround isSpace
+{-# INLINE [1] strip #-}
 
 -- | /O(n)/ 'splitAt' @n t@ returns a pair whose first element is a
 -- prefix of @t@ of length @n@, and whose second is the remainder of
   | T.length t == 1 = ts : tails ts'
   | otherwise       = ts : tails (Chunk (T.unsafeTail t) ts')
 
--- | /O(n)/ Break a 'Text' into pieces separated by the byte
--- argument, consuming the delimiter. I.e.
+-- $split
 --
--- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
--- > split 'a'  "aXaXaXa"    == ["","X","X","X",""]
--- > split 'x'  "x"          == ["",""]
+-- Splitting functions in this library do not perform character-wise
+-- copies to create substrings; they just construct new 'Text's that
+-- are slices of the original.
+
+-- | /O(m)*O(n)/ Break a 'Text' into pieces separated by the first
+-- 'Text' argument, consuming the delimiter. Examples:
+--
+-- > split "\r\n" "a\r\nb\r\nd\r\ne" == ["a","b","d","e"]
+-- > split "aaa"  "aaaXaaaXaaaXaaa"  == ["","X","X","X",""]
+-- > split "x"    "x"                == ["",""]
 -- 
 -- and
 --
--- > intercalate [c] . split c == id
--- > split == splitWith . (==)
--- 
--- As for all splitting functions in this library, this function does
--- not copy the substrings, it just constructs new 'Text's that are
--- slices of the original.
-split :: Char -> Text -> [Text]
-split c = splitWith (==c)
-{-# INLINE split #-}
+-- > intercalate s . split s         == id
+-- > split (singleton c)             == splitWith (==c)
+split :: Text                   -- ^ Text to split on
+      -> Text                   -- ^ Input text
+      -> [Text]
+split pat src0
+    | l == 0    = [src0]
+    | l == 1    = splitWith (== (head pat)) src0
+    | otherwise = go src0
+  where
+    l      = length pat
+    go src = search 0 src
+      where
+        search !n !s
+            | null s             = [src]      -- not found
+            | pat `isPrefixOf` s = take n src : go (drop l s)
+            | otherwise          = search (n+1) (tail s)
+{-# INLINE [1] split #-}
+
+{-# RULES
+"LAZY TEXT split/singleton -> splitWith/==" [~1] forall c t.
+    split (singleton c) t = splitWith (==c) t
+  #-}
+
+-- | /O(m)*O(n)/ Break a 'Text' into pieces at most @k@ times,
+-- treating the first 'Text' argument as the delimiter to break on,
+-- and consuming the delimiter.  The last element of the list contains
+-- the remaining text after the number of times to split has been
+-- reached.  A value of zero or less for @k@ causes no splitting to
+-- occur.
+--
+-- Examples:
+--
+-- > splitTimes 0   "//"  "a//b//c"   == ["a//b//c"]
+-- > splitTimes 2   ":"   "a:b:c:d:e" == ["a","b","c:d:e"]
+-- > splitTimes 100 "???" "a????b"    == ["a","?b"]
+--
+-- and
+--
+-- > intercalate s . splitTimes k s   == id
+splitTimes :: Int64             -- ^ Maximum number of times to split
+           -> Text              -- ^ Text to split on
+           -> Text              -- ^ Input text
+           -> [Text]
+splitTimes k pat src0
+    | k <= 0 || l == 0 = [src0]
+    | otherwise        = go k src0
+  where
+    l         = length pat
+    go !i src = search 0 src
+      where
+        search !n !s
+            | i == 0 || null s   = [src]      -- not found or limit reached
+            | pat `isPrefixOf` s = take n src : go (i-1) (drop l s)
+            | otherwise          = search (n+1) (tail s)
+{-# INLINE splitTimes #-}
+
+-- | /O(m)*O(n)/ Break a 'Text' into pieces at most @k@ times, like
+-- 'splitTimes', but start from the end of the input and work towards
+-- the start.
+--
+-- Examples:
+--
+-- > splitTimes 2    "::" "a::b::c::d::e" == ["a","b","c::d::e"]
+-- > splitTimesEnd 2 "::" "a::b::c::d::e" == ["a::b::c","d","e"]
+splitTimesEnd :: Int64             -- ^ Maximum number of times to split
+              -> Text              -- ^ Text to split on
+              -> Text              -- ^ Input text
+              -> [Text]
+splitTimesEnd k pat src =
+    L.reverse . L.map reverse $ splitTimes k (reverse pat) (reverse src)
+{-# INLINE splitTimesEnd #-}
 
 -- | /O(n)/ Splits a 'Text' into components delimited by separators,
 -- where the predicate returns True for a separator element.  The

File tests/Properties.hs

 t_reverse_short n = L.reverse `eqP` (unpackS . S.reverse . shorten n . S.stream)
 
 t_replace s d     = (L.intercalate d . split s) `eqP` (unpackS . T.replace (T.pack s) (T.pack d))
+tl_replace s d     = (L.intercalate d . split s) `eqP` (unpackS . TL.replace (TL.pack s) (TL.pack d))
 
 split :: (Eq a) => [a] -> [a] -> [[a]]
 split pat src0
 s_drop n          = L.drop n      `eqP` (unpackS . S.drop n)
 sf_drop p n       = (L.drop n . L.filter p) `eqP` (unpackS . S.drop n . S.filter p)
 t_drop n          = L.drop n      `eqP` (unpackS . T.drop n)
-tl_drop n         = L.drop n      `eqP` (unpackS . TL.drop n)
+tl_drop n         = L.drop n      `eqP` (unpackS . TL.drop (fromIntegral n))
 s_takeWhile p     = L.takeWhile p `eqP` (unpackS . S.takeWhile p)
 sf_takeWhile q p  = (L.takeWhile p . L.filter q) `eqP` (unpackS . S.takeWhile p . S.filter q)
 t_takeWhile p     = L.takeWhile p `eqP` (unpackS . T.takeWhile p)
 t_dropWhile p     = L.dropWhile p `eqP` (unpackS . T.dropWhile p)
 tl_dropWhile p    = L.dropWhile p `eqP` (unpackS . S.dropWhile p)
 t_dropWhileEnd p  = (L.reverse . L.dropWhile p . L.reverse) `eqP` (unpackS . T.dropWhileEnd p)
+tl_dropWhileEnd p = (L.reverse . L.dropWhile p . L.reverse) `eqP` (unpackS . TL.dropWhileEnd p)
 t_dropAround p    = (L.dropWhile p . L.reverse . L.dropWhile p . L.reverse) `eqP` (unpackS . T.dropAround p)
+tl_dropAround p   = (L.dropWhile p . L.reverse . L.dropWhile p . L.reverse) `eqP` (unpackS . TL.dropAround p)
 t_stripStart      = T.dropWhile isSpace `eq` T.stripStart
+tl_stripStart     = TL.dropWhile isSpace `eq` TL.stripStart
 t_stripEnd        = T.dropWhileEnd isSpace `eq` T.stripEnd
+tl_stripEnd       = TL.dropWhileEnd isSpace `eq` TL.stripEnd
 t_strip           = T.dropAround isSpace `eq` T.strip
+tl_strip          = TL.dropAround isSpace `eq` TL.strip
 t_splitAt n       = L.splitAt n   `eqP` (unpack2 . T.splitAt n)
 tl_splitAt n      = L.splitAt n   `eqP` (unpack2 . TL.splitAt (fromIntegral n))
 t_span p          = L.span p      `eqP` (unpack2 . T.span p)
                                 (a,b)  -> a ++ [T.intercalate t b]
 t_splitTimesEnd_i k t = id `eq` (T.intercalate t . T.splitTimesEnd k t)
 t_splitTimesEnd_split t = T.splitTimesEnd maxBound t `eq` T.split t
-tl_split_i c      = id `eq` (TL.intercalate (TL.singleton c) . TL.split c)
+tl_split_i t      = id `eq` (TL.intercalate t . TL.split t)
 
 t_splitWith p     = splitWith p `eqP` (map unpackS . T.splitWith p)
 t_splitWith_count c = (L.length . T.splitWith (==c)) `eq` ((1+) . T.count c)
     testProperty "tl_reverse" tl_reverse,
     testProperty "t_reverse_short" t_reverse_short,
     testProperty "t_replace" t_replace,
+    testProperty "tl_replace" tl_replace,
 
     testGroup "case conversion" [
       testProperty "s_toCaseFold_length" s_toCaseFold_length,
       testProperty "t_dropWhile" t_dropWhile,
       testProperty "tl_dropWhile" tl_dropWhile,
       testProperty "t_dropWhileEnd" t_dropWhileEnd,
+      testProperty "tl_dropWhileEnd" tl_dropWhileEnd,
       testProperty "t_dropAround" t_dropAround,
+      testProperty "tl_dropAround" tl_dropAround,
       testProperty "t_stripStart" t_stripStart,
+      testProperty "tl_stripStart" tl_stripStart,
       testProperty "t_stripEnd" t_stripEnd,
+      testProperty "tl_stripEnd" tl_stripEnd,
       testProperty "t_strip" t_strip,
+      testProperty "tl_strip" tl_strip,
       testProperty "t_splitAt" t_splitAt,
       testProperty "tl_splitAt" tl_splitAt,
       testProperty "t_span" t_span,