1. Bryan O'Sullivan
  2. text

Commits

Bryan O'Sullivan  committed d5eaf5b

Expose and fix a bug in lazy split.

  • Participants
  • Parent commits 4230af0
  • Branches default

Comments (0)

Files changed (2)

File Data/Text/Lazy.hs

View file
  • Ignore whitespace
     unstream (S.drop n (stream t)) = drop n t
   #-}
 
+-- | /O(n)/ 'dropWords' @n@ returns the suffix with @n@ 'Word16'
+-- values dropped, or the empty 'Text' if @n@ is greater than the
+-- number of 'Word16' values present.
+dropWords :: Int64 -> Text -> Text
+dropWords i t0
+    | i <= 0    = t0
+    | otherwise = drop' i t0
+  where drop' 0 ts           = ts
+        drop' _ Empty        = Empty
+        drop' n (Chunk (T.Text arr off len) ts)
+            | n < len'  = chunk (textP arr (off+n') (len-n')) ts
+            | otherwise = drop' (n - len') ts
+            where len'  = fromIntegral len
+                  n'    = fromIntegral n
+
 -- | /O(n)/ 'takeWhile', applied to a predicate @p@ and a 'Text', returns
 -- the longest prefix (possibly empty) of elements that satisfy @p@.
 -- This function is subject to array fusion.
     | otherwise       = go 0 (indices pat src) src
   where
     go  _ []     cs = [cs]
-    go !s (x:xs) cs = let h :*: t = splitAtWord (x-s) cs
-                      in  h : go (s+x) xs t
+    go !i (x:xs) cs = let h :*: t = splitAtWord (x-i) cs
+                      in  h : go (x+l) xs (dropWords l t)
+    l = foldlChunks (\a (T.Text _ _ b) -> a + fromIntegral b) 0 pat
 {-# INLINE [1] split #-}
 
 {-# RULES

File tests/Properties.hs

View file
  • Ignore whitespace
 t_findSplit s           = T.split s `eq` splitty
   where splitty t       = case T.find s t of
                             (x,xs) -> x : L.map (T.drop (T.length s) . fst) xs
-t_split_split s         = T.split s `eq` Slow.split s
+t_split_split s         = unsquare ((T.split s `eq` Slow.split s) .
+                                    T.intercalate s)
+tl_split_split s        = unsquare (((TL.split (chunkify s) . chunkify) `eq`
+                                     (map chunkify . T.split s)) .
+                                    T.intercalate s)
 t_split_i (NotEmpty t)  = id `eq` (T.intercalate t . T.split t)
 tl_split_i (NotEmpty t) = id `eq` (TL.intercalate t . TL.split t)
 
 t_chunksOf_length k t = len == T.length t || (k <= 0 && len == 0)
   where len = L.sum . L.map T.length $ T.chunksOf k t
 
-tl_chunksOf k = T.chunksOf k `eq` (map (T.concat . TL.toChunks) . TL.chunksOf (fromIntegral k) . TL.fromChunks . (:[]))
+chunkify = TL.fromChunks . (:[])
+
+tl_chunksOf k = T.chunksOf k `eq` (map (T.concat . TL.toChunks) . TL.chunksOf (fromIntegral k) . chunkify)
 
 t_lines           = L.lines       `eqP` (map unpackS . T.lines)
 tl_lines          = L.lines       `eqP` (map unpackS . TL.lines)
     testGroup "breaking many" [
       testProperty "t_findSplit" t_findSplit,
       testProperty "t_split_split" t_split_split,
+      testProperty "tl_split_split" tl_split_split,
       testProperty "t_split_i" t_split_i,
       testProperty "tl_split_i" tl_split_i,
       testProperty "t_splitBy" t_splitBy,