# HG changeset patch
# User Bryan O'Sullivan
# Date 1255066709 0
# Node ID cfa31e2022ee12db3b91a14a33a7f99431d38165
# Parent 367ca64c00f63eacc8767db557fe1cc9bd09bda0
Implement lazy find.
diff --git a/Data/Text.hs b/Data/Text.hs
--- a/Data/Text.hs
+++ b/Data/Text.hs
@@ -1036,6 +1036,9 @@
-- > ==> ("", [])
-- > find "/" "a/b/c/d"
-- > ==> ("a", [("/b","/b/c/d"), ("/c","/c/d"), ("/d","/d")])
+--
+-- In (unlikely) bad cases, this function's time complexity degrades
+-- towards /O(n*m)/.
find :: Text -> Text -> (Text, [(Text, Text)])
find pat src@(Text arr off len)
| null pat = emptyError "find"
diff --git a/Data/Text/Lazy.hs b/Data/Text/Lazy.hs
--- a/Data/Text/Lazy.hs
+++ b/Data/Text/Lazy.hs
@@ -145,6 +145,7 @@
-- * Searching
, filter
+ , find
, findBy
, partitionBy
@@ -873,6 +874,40 @@
(x:_) -> let h :*: t = splitAtWord x src
in (h, t)
+-- | /O(n+m)/ Find all non-overlapping instances of @needle@ in
+-- @haystack@. The first element of the returned pair is the prefix
+-- of @haystack@ prior to any matches of @needle@. The second is a
+-- list of pairs.
+--
+-- The first element of each pair in the list is a span from the
+-- beginning of a match to the beginning of the next match, while the
+-- second is a span from the beginning of the match to the end of the
+-- input.
+--
+-- Examples:
+--
+-- > find "::" ""
+-- > ==> ("", [])
+-- > find "/" "a/b/c/d"
+-- > ==> ("a", [("/b","/b/c/d"), ("/c","/c/d"), ("/d","/d")])
+--
+-- This function is strict in its first argument, and lazy in its
+-- second.
+--
+-- In (unlikely) bad cases, this function's time complexity degrades
+-- towards /O(n*m)/.
+find :: Text -> Text -> (Text, [(Text, Text)])
+find pat src
+ | null pat = emptyError "find"
+ | otherwise = case indices pat src of
+ [] -> (src, [])
+ (x:xs) -> let h :*: t = splitAtWord x src
+ in (h, go x xs t)
+ where
+ go !i (x:xs) cs = let h :*: t = splitAtWord (x-i) cs
+ in (h, cs) : go x xs t
+ go _ _ cs = [(cs,cs)]
+
-- | /O(n)/ 'breakBy' is like 'spanBy', but the prefix returned is over
-- elements that fail the predicate @p@.
breakBy :: (Char -> Bool) -> Text -> (Text, Text)
diff --git a/tests/Properties.hs b/tests/Properties.hs
--- a/tests/Properties.hs
+++ b/tests/Properties.hs
@@ -408,9 +408,14 @@
t_tails = L.tails `eqP` (map unpackS . T.tails)
tl_tails = L.tails `eqP` (map unpackS . TL.tails)
-t_findSplit s = T.split s `eq` splitty
- where splitty t = case T.find s t of
+t_findSplit s t = (T.split s `eq` splitty) u
+ where splitty v = case T.find s v of
(x,xs) -> x : L.map (T.drop (T.length s) . fst) xs
+ u = T.concat [t,s,t]
+tl_findSplit s t = (TL.split s `eq` splitty) u
+ where splitty v = case TL.find s v of
+ (x,xs) -> x : L.map (TL.drop (TL.length s) . fst) xs
+ u = TL.concat [t,s,t]
t_split_split s = unsquare ((T.split s `eq` Slow.split s) .
T.intercalate s)
tl_split_split s = unsquare (((TL.split (chunkify s) . chunkify) `eq`
@@ -798,6 +803,7 @@
testGroup "breaking many" [
testProperty "t_findSplit" t_findSplit,
+ testProperty "tl_findSplit" tl_findSplit,
testProperty "t_split_split" t_split_split,
testProperty "tl_split_split" tl_split_split,
testProperty "t_split_i" t_split_i,