-- Fast substring search for lazy 'Text', based on work by Boyer,
-- Moore, Horspool, Sunday, and Lundh. Adapted from the strict
--- /Note/: this is currently too strict!
-- | /O(n+m)/ Find the offsets of all non-overlapping indices of
-- @needle@ within @haystack@.
+-- This function is strict in @needle@, and lazy (as far as possible)
+-- in the chunks of @haystack@.
-- In (unlikely) bad cases, this algorithm's complexity degrades
indices :: Text -- ^ Substring to search for (@needle@)
-> Text -- ^ Text to search in (@haystack@)
-indices needle@(Chunk n ns) haystack@(Chunk k ks)
- | nlen <= 0 || ldiff < 0 = 
- | nlen == 1 = scanOne (nindex 0) 0 k ks
- | otherwise = scan 0 0 k ks
+indices needle@(Chunk n ns) _haystack@(Chunk k ks)
+ | nlen == 1 = scanOne (nindex 0) 0 k ks
+ | otherwise = scan 0 0 k ks
scan !g !i x@(T.Text _ _ l) xs
- Chunk y ys -> scan g (i-m) y ys
- | c == z && candidateMatch 0 = g : scan (g+nlen) (i+nlen) x xs
- | otherwise = scan (g+delta) (i+delta) x xs
+ Chunk y ys -> scan g (i-m) y ys
+ | lackingHay (i + nlen) x xs = 
+ | c == z && candidateMatch 0 = g : scan (g+nlen) (i+nlen) x xs
+ | otherwise = scan (g+delta) (i+delta) x xs
| hindex (i+j) /= nindex j = False
| otherwise = candidateMatch (j+1)
- ldiff = wordLength haystack - nlen
| on == c = i + fromIntegral h : go (h+1)
where on = A.unsafeIndex oarr (ooff+h)
+ -- | Check whether an attempt to index into the haystack at the
+ -- given offset will fail.
+ go p (T.Text _ _ l) ps = p' < q && case ps of
+ Chunk r rs -> go p' r rs
+ where p' = p + fromIntegral l
-- | Fast index into a partly unpacked 'Text'. We take into account
index (T.Text arr off len) xs i
| j < len = A.unsafeIndex arr (off+j)
- Empty | j == len -> 0 -- out of bounds, but legal
- | otherwise -> emptyError "index"
+ -- out of bounds, but legal
+ -- should never happen, due to lackingHay above
+ | otherwise -> emptyError "index"
Chunk c cs -> index c cs (i-fromIntegral len)