Commits

Bryan O'Sullivan committed cfa31e2

Implement lazy find.

  • Participants
  • Parent commits 367ca64

Comments (0)

Files changed (3)

File Data/Text.hs

 -- > ==> ("", [])
 -- > find "/" "a/b/c/d"
 -- > ==> ("a", [("/b","/b/c/d"), ("/c","/c/d"), ("/d","/d")])
+--
+-- In (unlikely) bad cases, this function's time complexity degrades
+-- towards /O(n*m)/.
 find :: Text -> Text -> (Text, [(Text, Text)])
 find pat src@(Text arr off len)
     | null pat  = emptyError "find"

File Data/Text/Lazy.hs

 
     -- * Searching
     , filter
+    , find
     , findBy
     , partitionBy
 
                     (x:_) -> let h :*: t = splitAtWord x src
                              in  (h, t)
 
+-- | /O(n+m)/ Find all non-overlapping instances of @needle@ in
+-- @haystack@.  The first element of the returned pair is the prefix
+-- of @haystack@ prior to any matches of @needle@.  The second is a
+-- list of pairs.
+--
+-- The first element of each pair in the list is a span from the
+-- beginning of a match to the beginning of the next match, while the
+-- second is a span from the beginning of the match to the end of the
+-- input.
+--
+-- Examples:
+--
+-- > find "::" ""
+-- > ==> ("", [])
+-- > find "/" "a/b/c/d"
+-- > ==> ("a", [("/b","/b/c/d"), ("/c","/c/d"), ("/d","/d")])
+--
+-- This function is strict in its first argument, and lazy in its
+-- second.
+--
+-- In (unlikely) bad cases, this function's time complexity degrades
+-- towards /O(n*m)/.
+find :: Text -> Text -> (Text, [(Text, Text)])
+find pat src
+    | null pat  = emptyError "find"
+    | otherwise = case indices pat src of
+                    []     -> (src, [])
+                    (x:xs) -> let h :*: t = splitAtWord x src
+                              in (h, go x xs t)
+  where
+    go !i (x:xs) cs = let h :*: t = splitAtWord (x-i) cs
+                      in (h, cs) : go x xs t
+    go  _ _      cs = [(cs,cs)]
+
 -- | /O(n)/ 'breakBy' is like 'spanBy', but the prefix returned is over
 -- elements that fail the predicate @p@.
 breakBy :: (Char -> Bool) -> Text -> (Text, Text)

File tests/Properties.hs

 t_tails           = L.tails       `eqP` (map unpackS . T.tails)
 tl_tails          = L.tails       `eqP` (map unpackS . TL.tails)
 
-t_findSplit s           = T.split s `eq` splitty
-  where splitty t       = case T.find s t of
+t_findSplit s t         = (T.split s `eq` splitty) u
+  where splitty v       = case T.find s v  of
                             (x,xs) -> x : L.map (T.drop (T.length s) . fst) xs
+        u = T.concat [t,s,t]
+tl_findSplit s t        = (TL.split s `eq` splitty) u
+  where splitty v       = case TL.find s v of
+                            (x,xs) -> x : L.map (TL.drop (TL.length s) . fst) xs
+        u = TL.concat [t,s,t]
 t_split_split s         = unsquare ((T.split s `eq` Slow.split s) .
                                     T.intercalate s)
 tl_split_split s        = unsquare (((TL.split (chunkify s) . chunkify) `eq`
 
     testGroup "breaking many" [
       testProperty "t_findSplit" t_findSplit,
+      testProperty "tl_findSplit" tl_findSplit,
       testProperty "t_split_split" t_split_split,
       testProperty "tl_split_split" tl_split_split,
       testProperty "t_split_i" t_split_i,