Commits

Bryan O'Sullivan committed df56fb1

Add commonPrefixes functions, and make lazy strip{Pre,Suf}fix faster

The old implemenations of stripPrefix and stripSuffix were placeholders
with terrible performance, but Sebastiaan Visser asked for them to
be improved ... so they are.

Comments (0)

Files changed (3)

     -- ** View patterns
     , stripPrefix
     , stripSuffix
+    , commonPrefixes
 
     -- * Searching
     , filter
 -------------------------------------------------------------------------------
 -- * View patterns
 
--- | /O(n)/ Returns the suffix of the second string if its prefix
--- matches the first.
+-- | /O(n)/ Return the suffix of the second string if its prefix
+-- matches the entire first string.
 --
 -- Examples:
 --
 -- > stripPrefix "foo" "foobar" == Just "bar"
+-- > stripPrefix ""    "baz"    == Just "baz"
 -- > stripPrefix "foo" "quux"   == Nothing
 --
 -- This is particularly useful with the @ViewPatterns@ extension to
 -- >
 -- > fnordLength :: Text -> Int
 -- > fnordLength (stripPrefix "fnord" -> Just suf) = T.length suf
--- > fnordLength _                              = -1
+-- > fnordLength _                                 = -1
 stripPrefix :: Text -> Text -> Maybe Text
 stripPrefix p@(Text _arr _off plen) t@(Text arr off len)
     | p `isPrefixOf` t = Just $! textP arr (off+plen) (len-plen)
     | otherwise        = Nothing
 
--- | /O(n)/ Returns the prefix of the second string if its suffix
--- matches the first.
+-- | /O(n)/ Find the longest non-empty common prefix of two strings
+-- and return it, along with the suffixes of each string at which they
+-- no longer match.
+--
+-- If the strings do not have a common prefix or either one is empty,
+-- this function returns 'Nothing'.
+--
+-- Examples:
+--
+-- > commonPrefixes "foobar" "fooquux" == Just ("foo","bar","quux")
+-- > commonPrefixes "veeble" "fetzer"  == Nothing
+-- > commonPrefixes "" "baz"           == Nothing
+commonPrefixes :: Text -> Text -> Maybe (Text,Text,Text)
+commonPrefixes t0@(Text arr0 off0 len0) t1@(Text arr1 off1 len1) = go 0 0
+  where
+    go !i !j | i < len0 && j < len1 && a == b = go (i+d0) (j+d1)
+             | i > 0     = Just (Text arr0 off0 i,
+                                 textP arr0 (off0+i) (len0-i),
+                                 textP arr1 (off1+j) (len1-j))
+             | otherwise = Nothing
+      where Iter a d0 = iter t0 i
+            Iter b d1 = iter t1 j
+
+-- | /O(n)/ Return the prefix of the second string if its suffix
+-- matches the entire first string.
 --
 -- Examples:
 --
 -- > stripSuffix "bar" "foobar" == Just "foo"
+-- > stripSuffix ""    "baz"    == Just "baz"
 -- > stripSuffix "foo" "quux"   == Nothing
 --
 -- This is particularly useful with the @ViewPatterns@ extension to
 -- >
 -- > quuxLength :: Text -> Int
 -- > quuxLength (stripSuffix "quux" -> Just pre) = T.length pre
--- > quuxLength _                             = -1
+-- > quuxLength _                                = -1
 stripSuffix :: Text -> Text -> Maybe Text
 stripSuffix p@(Text _arr _off plen) t@(Text arr off len)
     | p `isSuffixOf` t = Just $! textP arr off (len-plen)

Data/Text/Lazy.hs

     -- ** View patterns
     , stripPrefix
     , stripSuffix
+    , commonPrefixes
 
     -- * Searching
     , filter
 import Prelude (Char, Bool(..), Maybe(..), String,
                 Eq(..), Ord(..), Ordering(..), Read(..), Show(..),
                 (&&), (||), (+), (-), (.), ($), (++),
-                div, error, flip, fromIntegral, not, otherwise)
+                div, error, flip, fmap, fromIntegral, not, otherwise)
 import qualified Prelude as P
 #if defined(HAVE_DEEPSEQ)
 import Control.DeepSeq (NFData(..))
 -------------------------------------------------------------------------------
 -- * View patterns
 
--- | /O(n)/ Returns the suffix of the second string if its prefix
--- matches the first.
+-- | /O(n)/ Return the suffix of the second string if its prefix
+-- matches the entire first string.
 --
 -- Examples:
 --
 -- > stripPrefix "foo" "foobar" == Just "bar"
+-- > stripPrefix ""    "baz"    == Just "baz"
 -- > stripPrefix "foo" "quux"   == Nothing
 --
 -- This is particularly useful with the @ViewPatterns@ extension to
 -- GHC, as follows:
 --
 -- > {-# LANGUAGE ViewPatterns #-}
--- > import Data.Text as T
+-- > import Data.Text.Lazy as T
 -- >
 -- > fnordLength :: Text -> Int
 -- > fnordLength (stripPrefix "fnord" -> Just suf) = T.length suf
--- > fnordLength _                              = -1
+-- > fnordLength _                                 = -1
 stripPrefix :: Text -> Text -> Maybe Text
--- Yes, this could be much more efficient.
 stripPrefix p t
-    | p `isPrefixOf` t = Just (drop (length p) t)
-    | otherwise        = Nothing
+    | null p    = Just t
+    | otherwise = case commonPrefixes p t of
+                    Just (_,c,r) | null c -> Just r
+                    _                     -> Nothing
 
--- | /O(n)/ Returns the prefix of the second string if its suffix
--- matches the first.
+-- | /O(n)/ Find the longest non-empty common prefix of two strings
+-- and return it, along with the suffixes of each string at which they
+-- no longer match.
+--
+-- If the strings do not have a common prefix or either one is empty,
+-- this function returns 'Nothing'.
+--
+-- Examples:
+--
+-- > commonPrefixes "foobar" "fooquux" == Just ("foo","bar","quux")
+-- > commonPrefixes "veeble" "fetzer"  == Nothing
+-- > commonPrefixes "" "baz"           == Nothing
+commonPrefixes :: Text -> Text -> Maybe (Text,Text,Text)
+commonPrefixes Empty _ = Nothing
+commonPrefixes _ Empty = Nothing
+commonPrefixes a0 b0   = Just (go a0 b0 [])
+  where
+    go t0@(Chunk x xs) t1@(Chunk y ys) ps
+        = case T.commonPrefixes x y of
+            Just (p,a,b)
+              | T.null a  -> go xs (chunk b ys) (p:ps)
+              | T.null b  -> go (chunk a xs) ys (p:ps)
+              | otherwise -> (fromChunks (L.reverse (p:ps)),chunk a xs, chunk b ys)
+            Nothing       -> (fromChunks (L.reverse ps),t0,t1)
+    go t0 t1 ps = (fromChunks (L.reverse ps),t0,t1)
+
+-- | /O(n)/ Return the prefix of the second string if its suffix
+-- matches the entire first string.
 --
 -- Examples:
 --
 -- > stripSuffix "bar" "foobar" == Just "foo"
+-- > stripSuffix ""    "baz"    == Just "baz"
 -- > stripSuffix "foo" "quux"   == Nothing
 --
 -- This is particularly useful with the @ViewPatterns@ extension to
 -- GHC, as follows:
 --
 -- > {-# LANGUAGE ViewPatterns #-}
--- > import Data.Text as T
+-- > import Data.Text.Lazy as T
 -- >
 -- > quuxLength :: Text -> Int
 -- > quuxLength (stripSuffix "quux" -> Just pre) = T.length pre
--- > quuxLength _                             = -1
+-- > quuxLength _                                = -1
 stripSuffix :: Text -> Text -> Maybe Text
--- Yes, this could be much more efficient.
-stripSuffix p t
-    | p `isSuffixOf` t = Just (take (length t - length p) t)
-    | otherwise        = Nothing
+stripSuffix p t = reverse `fmap` stripPrefix (reverse p) (reverse t)
 
 -- | /O(n)/ 'filter', applied to a predicate and a 'Text',
 -- returns a 'Text' containing those characters that satisfy the

tests/Properties.hs

 t_stripSuffix s      = (fmap packS . stripSuffix s) `eqP` T.stripSuffix (packS s)
 tl_stripSuffix s     = (fmap packS . stripSuffix s) `eqP` TL.stripSuffix (packS s)
 
+commonPrefixes a0@(_:_) b0@(_:_) = Just (go a0 b0 [])
+    where go (a:as) (b:bs) ps
+              | a == b = go as bs (a:ps)
+          go as bs ps  = (reverse ps,as,bs)
+commonPrefixes _ _ = Nothing
+
+t_commonPrefixes a b (NonEmpty p)
+    = commonPrefixes pa pb ==
+      repack `fmap` T.commonPrefixes (packS pa) (packS pb)
+  where repack (x,y,z) = (unpackS x,unpackS y,unpackS z)
+        pa = p ++ a
+        pb = p ++ b
+
+tl_commonPrefixes a b (NonEmpty p)
+    = commonPrefixes pa pb ==
+      repack `fmap` TL.commonPrefixes (packS pa) (packS pb)
+  where repack (x,y,z) = (unpackS x,unpackS y,unpackS z)
+        pa = p ++ a
+        pb = p ++ b
+
 sf_elem p c       = (L.elem c . L.filter p) `eqP` (S.elem c . S.filter p)
 sf_filter q p     = (L.filter p . L.filter q) `eqP`
                     (unpackS . S.filter p . S.filter q)
       testProperty "t_stripPrefix" t_stripPrefix,
       testProperty "tl_stripPrefix" tl_stripPrefix,
       testProperty "t_stripSuffix" t_stripSuffix,
-      testProperty "tl_stripSuffix" tl_stripSuffix
+      testProperty "tl_stripSuffix" tl_stripSuffix,
+      testProperty "t_commonPrefixes" t_commonPrefixes,
+      testProperty "tl_commonPrefixes" tl_commonPrefixes
     ]
   ],