Bryan O'Sullivan avatar Bryan O'Sullivan committed a29e369

Implement breakSubstring

Comments (0)

Files changed (2)

     -- ** Breaking into many substrings
     , split
     , splitWith
-    -- , breakSubstring
+    , breakSubstring
 
     -- ** Breaking into lines and words
     , lines
 find p t = S.find p (stream t)
 {-# INLINE find #-}
 
+-- | Break a string on a substring, returning a pair of the part of the
+-- string prior to the match, and the rest of the string.
+--
+-- The following relationships hold:
+--
+-- > break (== c) l == breakSubstring (singleton c) l
+--
+-- and:
+--
+-- > findSubstring s l ==
+-- >    if null s then Just 0
+-- >              else case breakSubstring s l of
+-- >                       (x,y) | null y    -> Nothing
+-- >                             | otherwise -> Just (length x)
+--
+-- For example, to tokenise a string, dropping delimiters:
+--
+-- > tokenise x y = h : if null t then [] else tokenise x (drop (length x) t)
+-- >     where (h,t) = breakSubstring x y
+--
+-- To skip to the first occurence of a string:
+--
+-- > snd (breakSubstring x y)
+--
+-- To take the parts of a string before a delimiter:
+--
+-- > fst (breakSubstring x y)
+--
+breakSubstring :: Text -- ^ String to search for
+               -> Text -- ^ String to search in
+               -> (Text,Text) -- ^ Head and tail of string broken at substring
+
+breakSubstring pat src = search 0 src
+  where
+    search !n !s
+        | null s             = (src,empty)      -- not found
+        | pat `isPrefixOf` s = (take n src,s)
+        | otherwise          = search (n+1) (tail s)
+
 -- | /O(n)/ 'filter', applied to a predicate and a 'Text',
 -- returns a 'Text' containing those characters that satisfy the
 -- predicate.

tests/Properties.hs

           whale xs = xs
 -}
 
+prop_breakSubstring_isInfixOf s l
+                     = T.isInfixOf s l ==
+                       T.null s || (not . T.null . snd $ T.breakSubstring s l)
+prop_breakSubstringC c
+                     = L.break (==c) `eqP`
+                       (unpack2 . T.breakSubstring (T.singleton c))
+
 prop_lines           = L.lines       `eqP` (map unpack . T.lines)
 prop_words           = L.words       `eqP` (map unpack . T.words)
 prop_unlines         = L.unlines     `eq`  (unpack . T.unlines . map pack)
 
   ("prop_split_i", mytest prop_split_i),
 --("prop_splitWith", mytest prop_splitWith),
+  ("prop_breakSubstringC", mytest prop_breakSubstringC),
+  ("prop_breakSubstring_isInfixOf", mytest prop_breakSubstring_isInfixOf),
 
   ("prop_lines", mytest prop_lines),
   ("prop_words", mytest prop_words),
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.