Commits

Bryan O'Sullivan committed 88752f3

Rename split to bytestring-like name

Comments (0)

Files changed (4)

 
     -- ** Breaking into many substrings
     -- $split
+    , splitOn
     , split
-    , splitBy
     , chunksOf
 
     -- ** Breaking into lines and words
         -> Text                 -- ^ Replacement text
         -> Text                 -- ^ Input text
         -> Text
-replace s d = intercalate d . split s
+replace s d = intercalate d . splitOn s
 {-# INLINE replace #-}
 
 -- ----------------------------------------------------------------------------
 --
 -- Examples:
 --
--- > split "\r\n" "a\r\nb\r\nd\r\ne" == ["a","b","d","e"]
--- > split "aaa"  "aaaXaaaXaaaXaaa"  == ["","X","X","X",""]
--- > split "x"    "x"                == ["",""]
+-- > splitOn "\r\n" "a\r\nb\r\nd\r\ne" == ["a","b","d","e"]
+-- > splitOn "aaa"  "aaaXaaaXaaaXaaa"  == ["","X","X","X",""]
+-- > splitOn "x"    "x"                == ["",""]
 -- 
 -- and
 --
--- > intercalate s . split s         == id
--- > split (singleton c)             == splitBy (==c)
+-- > intercalate s . splitOn s         == id
+-- > splitOn (singleton c)             == split (==c)
 --
 -- In (unlikely) bad cases, this function's time complexity degrades
 -- towards /O(n*m)/.
-split :: Text -> Text -> [Text]
-split pat@(Text _ _ l) src@(Text arr off len)
-    | l <= 0          = emptyError "split"
-    | isSingleton pat = splitBy (== unsafeHead pat) src
+splitOn :: Text -> Text -> [Text]
+splitOn pat@(Text _ _ l) src@(Text arr off len)
+    | l <= 0          = emptyError "splitOn"
+    | isSingleton pat = split (== unsafeHead pat) src
     | otherwise       = go 0 (indices pat src)
   where
     go !s (x:xs) =  textP arr (s+off) (x-s) : go (x+l) xs
     go  s _      = [textP arr (s+off) (len-s)]
-{-# INLINE [1] split #-}
+{-# INLINE [1] splitOn #-}
 
 {-# RULES
-"TEXT split/singleton -> splitBy/==" [~1] forall c t.
-    split (singleton c) t = splitBy (==c) t
+"TEXT splitOn/singleton -> split/==" [~1] forall c t.
+    splitOn (singleton c) t = split (==c) t
   #-}
 
 -- | /O(n)/ Splits a 'Text' into components delimited by separators,
 -- resulting components do not contain the separators.  Two adjacent
 -- separators result in an empty component in the output.  eg.
 --
--- > splitBy (=='a') "aabbaca" == ["","","bb","c",""]
--- > splitBy (=='a') ""        == [""]
-splitBy :: (Char -> Bool) -> Text -> [Text]
-splitBy _ t@(Text _off _arr 0) = [t]
-splitBy p t = loop t
+-- > split (=='a') "aabbaca" == ["","","bb","c",""]
+-- > split (=='a') ""        == [""]
+split :: (Char -> Bool) -> Text -> [Text]
+split _ t@(Text _off _arr 0) = [t]
+split p t = loop t
     where loop s | null s'   = [l]
                  | otherwise = l : loop (unsafeTail s')
               where (l, s') = break p s
-{-# INLINE splitBy #-}
+{-# INLINE split #-}
 
 -- | /O(n)/ Splits a 'Text' into components of length @k@.  The last
 -- element may be shorter than the other chunks, depending on the

Data/Text/Lazy.hs

 
     -- ** Breaking into many substrings
     -- $split
+    , splitOn
     , split
-    , splitBy
     , chunksOf
     -- , breakSubstring
 
         -> Text                 -- ^ Replacement text
         -> Text                 -- ^ Input text
         -> Text
-replace s d = intercalate d . split s
+replace s d = intercalate d . splitOn s
 {-# INLINE replace #-}
 
 -- ----------------------------------------------------------------------------
 --
 -- Examples:
 --
--- > split "\r\n" "a\r\nb\r\nd\r\ne" == ["a","b","d","e"]
--- > split "aaa"  "aaaXaaaXaaaXaaa"  == ["","X","X","X",""]
--- > split "x"    "x"                == ["",""]
+-- > splitOn "\r\n" "a\r\nb\r\nd\r\ne" == ["a","b","d","e"]
+-- > splitOn "aaa"  "aaaXaaaXaaaXaaa"  == ["","X","X","X",""]
+-- > splitOn "x"    "x"                == ["",""]
 -- 
 -- and
 --
--- > intercalate s . split s         == id
--- > split (singleton c)             == splitBy (==c)
+-- > intercalate s . splitOn s         == id
+-- > splitOn (singleton c)             == split (==c)
 --
 -- This function is strict in its first argument, and lazy in its
 -- second.
 --
 -- In (unlikely) bad cases, this function's time complexity degrades
 -- towards /O(n*m)/.
-split :: Text                   -- ^ Text to split on
-      -> Text                   -- ^ Input text
-      -> [Text]
-split pat src
-    | null pat        = emptyError "split"
-    | isSingleton pat = splitBy (== head pat) src
+splitOn :: Text                 -- ^ Text to split on
+        -> Text                 -- ^ Input text
+        -> [Text]
+splitOn pat src
+    | null pat        = emptyError "splitOn"
+    | isSingleton pat = split (== head pat) src
     | otherwise       = go 0 (indices pat src) src
   where
     go  _ []     cs = [cs]
     go !i (x:xs) cs = let h :*: t = splitAtWord (x-i) cs
                       in  h : go (x+l) xs (dropWords l t)
     l = foldlChunks (\a (T.Text _ _ b) -> a + fromIntegral b) 0 pat
-{-# INLINE [1] split #-}
+{-# INLINE [1] splitOn #-}
 
 {-# RULES
-"LAZY TEXT split/singleton -> splitBy/==" [~1] forall c t.
-    split (singleton c) t = splitBy (==c) t
+"LAZY TEXT splitOn/singleton -> split/==" [~1] forall c t.
+    splitOn (singleton c) t = split (==c) t
   #-}
 
 -- | /O(n)/ Splits a 'Text' into components delimited by separators,
 -- resulting components do not contain the separators.  Two adjacent
 -- separators result in an empty component in the output.  eg.
 --
--- > splitBy (=='a') "aabbaca" == ["","","bb","c",""]
--- > splitBy (=='a') []        == [""]
-splitBy :: (Char -> Bool) -> Text -> [Text]
-splitBy _ Empty = [Empty]
-splitBy p (Chunk t0 ts0) = comb [] (T.splitBy p t0) ts0
+-- > split (=='a') "aabbaca" == ["","","bb","c",""]
+-- > split (=='a') []        == [""]
+split :: (Char -> Bool) -> Text -> [Text]
+split _ Empty = [Empty]
+split p (Chunk t0 ts0) = comb [] (T.split p t0) ts0
   where comb acc (s:[]) Empty        = revChunks (s:acc) : []
-        comb acc (s:[]) (Chunk t ts) = comb (s:acc) (T.splitBy p t) ts
+        comb acc (s:[]) (Chunk t ts) = comb (s:acc) (T.split p t) ts
         comb acc (s:ss) ts           = revChunks (s:acc) : comb [] ss ts
-        comb _   []     _            = impossibleError "splitBy"
-{-# INLINE splitBy #-}
+        comb _   []     _            = impossibleError "split"
+{-# INLINE split #-}
 
 -- | /O(n)/ Splits a 'Text' into components of length @k@.  The last
 -- element may be shorter than the other chunks, depending on the
 -- | /O(n)/ Breaks a 'Text' up into a list of words, delimited by 'Char's
 -- representing white space.
 words :: Text -> [Text]
-words = L.filter (not . null) . splitBy isSpace
+words = L.filter (not . null) . split isSpace
 {-# INLINE words #-}
 
 -- | /O(n)/ Joins lines, after appending a terminating newline to

tests/Properties.hs

 tl_reverse        = L.reverse `eqP` (unpackS . TL.reverse)
 t_reverse_short n = L.reverse `eqP` (unpackS . S.reverse . shorten n . S.stream)
 
-t_replace s d     = (L.intercalate d . split s) `eqP`
+t_replace s d     = (L.intercalate d . splitOn s) `eqP`
                     (unpackS . T.replace (T.pack s) (T.pack d))
-tl_replace s d     = (L.intercalate d . split s) `eqP`
+tl_replace s d     = (L.intercalate d . splitOn s) `eqP`
                      (unpackS . TL.replace (TL.pack s) (TL.pack d))
 
-split :: (Eq a) => [a] -> [a] -> [[a]]
-split pat src0
+splitOn :: (Eq a) => [a] -> [a] -> [[a]]
+splitOn pat src0
     | l == 0    = error "empty"
     | otherwise = go src0
   where
 t_findCount s     = (L.length . T.breakOnAll s) `eq` T.count s
 tl_findCount s    = (L.genericLength . TL.breakOnAll s) `eq` TL.count s
 
-t_split_split s         = (T.split s `eq` Slow.split s) . T.intercalate s
-tl_split_split s        = ((TL.split (TL.fromStrict s) . TL.fromStrict) `eq`
-                           (map TL.fromStrict . T.split s)) . T.intercalate s
-t_split_i (NotEmpty t)  = id `eq` (T.intercalate t . T.split t)
-tl_split_i (NotEmpty t) = id `eq` (TL.intercalate t . TL.split t)
+t_splitOn_split s         = (T.splitOn s `eq` Slow.splitOn s) . T.intercalate s
+tl_splitOn_split s        = ((TL.splitOn (TL.fromStrict s) . TL.fromStrict) `eq`
+                           (map TL.fromStrict . T.splitOn s)) . T.intercalate s
+t_splitOn_i (NotEmpty t)  = id `eq` (T.intercalate t . T.splitOn t)
+tl_splitOn_i (NotEmpty t) = id `eq` (TL.intercalate t . TL.splitOn t)
 
-t_splitBy p       = splitBy p `eqP` (map unpackS . T.splitBy p)
-t_splitBy_count c = (L.length . T.splitBy (==c)) `eq`
-                    ((1+) . T.count (T.singleton c))
-t_splitBy_split c = T.splitBy (==c) `eq` T.split (T.singleton c)
-tl_splitBy p      = splitBy p `eqP` (map unpackS . TL.splitBy p)
+t_split p       = split p `eqP` (map unpackS . T.split p)
+t_split_count c = (L.length . T.split (==c)) `eq`
+                  ((1+) . T.count (T.singleton c))
+t_split_splitOn c = T.split (==c) `eq` T.splitOn (T.singleton c)
+tl_split p      = split p `eqP` (map unpackS . TL.split p)
 
-splitBy :: (a -> Bool) -> [a] -> [[a]]
-splitBy _ [] =  [[]]
-splitBy p xs = loop xs
+split :: (a -> Bool) -> [a] -> [[a]]
+split _ [] =  [[]]
+split p xs = loop xs
     where loop s | null s'   = [l]
                  | otherwise = l : loop (tail s')
               where (l, s') = break p s
     where l = L.length s
 
 t_findIndex p     = L.findIndex p `eqP` T.findIndex p
-t_count (NotEmpty t)  = (subtract 1 . L.length . T.split t) `eq` T.count t
-tl_count (NotEmpty t) = (subtract 1 . L.genericLength . TL.split t) `eq`
+t_count (NotEmpty t)  = (subtract 1 . L.length . T.splitOn t) `eq` T.count t
+tl_count (NotEmpty t) = (subtract 1 . L.genericLength . TL.splitOn t) `eq`
                         TL.count t
 t_zip s           = L.zip s `eqP` T.zip (packS s)
 tl_zip s          = L.zip s `eqP` TL.zip (packS s)
       testProperty "sl_filterCount" sl_filterCount,
       testProperty "t_findCount" t_findCount,
       testProperty "tl_findCount" tl_findCount,
-      testProperty "t_split_split" t_split_split,
-      testProperty "tl_split_split" tl_split_split,
-      testProperty "t_split_i" t_split_i,
-      testProperty "tl_split_i" tl_split_i,
-      testProperty "t_splitBy" t_splitBy,
-      testProperty "t_splitBy_count" t_splitBy_count,
-      testProperty "t_splitBy_split" t_splitBy_split,
-      testProperty "tl_splitBy" tl_splitBy,
+      testProperty "t_splitOn_split" t_splitOn_split,
+      testProperty "tl_splitOn_split" tl_splitOn_split,
+      testProperty "t_splitOn_i" t_splitOn_i,
+      testProperty "tl_splitOn_i" tl_splitOn_i,
+      testProperty "t_split" t_split,
+      testProperty "t_split_count" t_split_count,
+      testProperty "t_split_splitOn" t_split_splitOn,
+      testProperty "tl_split" tl_split,
       testProperty "t_chunksOf_same_lengths" t_chunksOf_same_lengths,
       testProperty "t_chunksOf_length" t_chunksOf_length,
       testProperty "tl_chunksOf" tl_chunksOf

tests/SlowFunctions.hs

 module SlowFunctions
     (
       indices
-    , split
+    , splitOn
     ) where
 
 import qualified Data.Text as T
            where t = Text harr (hoff+i) (hlen-i)
                  d = iter_ haystack i
 
-split :: T.Text                 -- ^ Text to split on
-      -> T.Text                 -- ^ Input text
-      -> [T.Text]
-split pat src0
-    | T.null pat  = error "split: empty"
-    | l == 1      = T.splitBy (== (unsafeHead pat)) src0
+splitOn :: T.Text               -- ^ Text to split on
+        -> T.Text               -- ^ Input text
+        -> [T.Text]
+splitOn pat src0
+    | T.null pat  = error "splitOn: empty"
+    | l == 1      = T.split (== (unsafeHead pat)) src0
     | otherwise   = go src0
   where
     l      = T.length pat
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.