Commits

Bryan O'Sullivan  committed df56fb1

Add commonPrefixes functions, and make lazy strip{Pre,Suf}fix faster

The old implemenations of stripPrefix and stripSuffix were placeholders
with terrible performance, but Sebastiaan Visser asked for them to
be improved ... so they are.

  • Participants
  • Parent commits 12804df

Comments (0)

Files changed (3)

File Data/Text.hs

     -- ** View patterns
     , stripPrefix
     , stripSuffix
+    , commonPrefixes
 
     -- * Searching
     , filter
 -------------------------------------------------------------------------------
 -- * View patterns
 
--- | /O(n)/ Returns the suffix of the second string if its prefix
--- matches the first.
+-- | /O(n)/ Return the suffix of the second string if its prefix
+-- matches the entire first string.
 --
 -- Examples:
 --
 -- > stripPrefix "foo" "foobar" == Just "bar"
+-- > stripPrefix ""    "baz"    == Just "baz"
 -- > stripPrefix "foo" "quux"   == Nothing
 --
 -- This is particularly useful with the @ViewPatterns@ extension to
 -- >
 -- > fnordLength :: Text -> Int
 -- > fnordLength (stripPrefix "fnord" -> Just suf) = T.length suf
--- > fnordLength _                              = -1
+-- > fnordLength _                                 = -1
 stripPrefix :: Text -> Text -> Maybe Text
 stripPrefix p@(Text _arr _off plen) t@(Text arr off len)
     | p `isPrefixOf` t = Just $! textP arr (off+plen) (len-plen)
     | otherwise        = Nothing
 
--- | /O(n)/ Returns the prefix of the second string if its suffix
--- matches the first.
+-- | /O(n)/ Find the longest non-empty common prefix of two strings
+-- and return it, along with the suffixes of each string at which they
+-- no longer match.
+--
+-- If the strings do not have a common prefix or either one is empty,
+-- this function returns 'Nothing'.
+--
+-- Examples:
+--
+-- > commonPrefixes "foobar" "fooquux" == Just ("foo","bar","quux")
+-- > commonPrefixes "veeble" "fetzer"  == Nothing
+-- > commonPrefixes "" "baz"           == Nothing
+commonPrefixes :: Text -> Text -> Maybe (Text,Text,Text)
+commonPrefixes t0@(Text arr0 off0 len0) t1@(Text arr1 off1 len1) = go 0 0
+  where
+    go !i !j | i < len0 && j < len1 && a == b = go (i+d0) (j+d1)
+             | i > 0     = Just (Text arr0 off0 i,
+                                 textP arr0 (off0+i) (len0-i),
+                                 textP arr1 (off1+j) (len1-j))
+             | otherwise = Nothing
+      where Iter a d0 = iter t0 i
+            Iter b d1 = iter t1 j
+
+-- | /O(n)/ Return the prefix of the second string if its suffix
+-- matches the entire first string.
 --
 -- Examples:
 --
 -- > stripSuffix "bar" "foobar" == Just "foo"
+-- > stripSuffix ""    "baz"    == Just "baz"
 -- > stripSuffix "foo" "quux"   == Nothing
 --
 -- This is particularly useful with the @ViewPatterns@ extension to
 -- >
 -- > quuxLength :: Text -> Int
 -- > quuxLength (stripSuffix "quux" -> Just pre) = T.length pre
--- > quuxLength _                             = -1
+-- > quuxLength _                                = -1
 stripSuffix :: Text -> Text -> Maybe Text
 stripSuffix p@(Text _arr _off plen) t@(Text arr off len)
     | p `isSuffixOf` t = Just $! textP arr off (len-plen)

File Data/Text/Lazy.hs

     -- ** View patterns
     , stripPrefix
     , stripSuffix
+    , commonPrefixes
 
     -- * Searching
     , filter
 import Prelude (Char, Bool(..), Maybe(..), String,
                 Eq(..), Ord(..), Ordering(..), Read(..), Show(..),
                 (&&), (||), (+), (-), (.), ($), (++),
-                div, error, flip, fromIntegral, not, otherwise)
+                div, error, flip, fmap, fromIntegral, not, otherwise)
 import qualified Prelude as P
 #if defined(HAVE_DEEPSEQ)
 import Control.DeepSeq (NFData(..))
 -------------------------------------------------------------------------------
 -- * View patterns
 
--- | /O(n)/ Returns the suffix of the second string if its prefix
--- matches the first.
+-- | /O(n)/ Return the suffix of the second string if its prefix
+-- matches the entire first string.
 --
 -- Examples:
 --
 -- > stripPrefix "foo" "foobar" == Just "bar"
+-- > stripPrefix ""    "baz"    == Just "baz"
 -- > stripPrefix "foo" "quux"   == Nothing
 --
 -- This is particularly useful with the @ViewPatterns@ extension to
 -- GHC, as follows:
 --
 -- > {-# LANGUAGE ViewPatterns #-}
--- > import Data.Text as T
+-- > import Data.Text.Lazy as T
 -- >
 -- > fnordLength :: Text -> Int
 -- > fnordLength (stripPrefix "fnord" -> Just suf) = T.length suf
--- > fnordLength _                              = -1
+-- > fnordLength _                                 = -1
 stripPrefix :: Text -> Text -> Maybe Text
--- Yes, this could be much more efficient.
 stripPrefix p t
-    | p `isPrefixOf` t = Just (drop (length p) t)
-    | otherwise        = Nothing
+    | null p    = Just t
+    | otherwise = case commonPrefixes p t of
+                    Just (_,c,r) | null c -> Just r
+                    _                     -> Nothing
 
--- | /O(n)/ Returns the prefix of the second string if its suffix
--- matches the first.
+-- | /O(n)/ Find the longest non-empty common prefix of two strings
+-- and return it, along with the suffixes of each string at which they
+-- no longer match.
+--
+-- If the strings do not have a common prefix or either one is empty,
+-- this function returns 'Nothing'.
+--
+-- Examples:
+--
+-- > commonPrefixes "foobar" "fooquux" == Just ("foo","bar","quux")
+-- > commonPrefixes "veeble" "fetzer"  == Nothing
+-- > commonPrefixes "" "baz"           == Nothing
+commonPrefixes :: Text -> Text -> Maybe (Text,Text,Text)
+commonPrefixes Empty _ = Nothing
+commonPrefixes _ Empty = Nothing
+commonPrefixes a0 b0   = Just (go a0 b0 [])
+  where
+    go t0@(Chunk x xs) t1@(Chunk y ys) ps
+        = case T.commonPrefixes x y of
+            Just (p,a,b)
+              | T.null a  -> go xs (chunk b ys) (p:ps)
+              | T.null b  -> go (chunk a xs) ys (p:ps)
+              | otherwise -> (fromChunks (L.reverse (p:ps)),chunk a xs, chunk b ys)
+            Nothing       -> (fromChunks (L.reverse ps),t0,t1)
+    go t0 t1 ps = (fromChunks (L.reverse ps),t0,t1)
+
+-- | /O(n)/ Return the prefix of the second string if its suffix
+-- matches the entire first string.
 --
 -- Examples:
 --
 -- > stripSuffix "bar" "foobar" == Just "foo"
+-- > stripSuffix ""    "baz"    == Just "baz"
 -- > stripSuffix "foo" "quux"   == Nothing
 --
 -- This is particularly useful with the @ViewPatterns@ extension to
 -- GHC, as follows:
 --
 -- > {-# LANGUAGE ViewPatterns #-}
--- > import Data.Text as T
+-- > import Data.Text.Lazy as T
 -- >
 -- > quuxLength :: Text -> Int
 -- > quuxLength (stripSuffix "quux" -> Just pre) = T.length pre
--- > quuxLength _                             = -1
+-- > quuxLength _                                = -1
 stripSuffix :: Text -> Text -> Maybe Text
--- Yes, this could be much more efficient.
-stripSuffix p t
-    | p `isSuffixOf` t = Just (take (length t - length p) t)
-    | otherwise        = Nothing
+stripSuffix p t = reverse `fmap` stripPrefix (reverse p) (reverse t)
 
 -- | /O(n)/ 'filter', applied to a predicate and a 'Text',
 -- returns a 'Text' containing those characters that satisfy the

File tests/Properties.hs

 t_stripSuffix s      = (fmap packS . stripSuffix s) `eqP` T.stripSuffix (packS s)
 tl_stripSuffix s     = (fmap packS . stripSuffix s) `eqP` TL.stripSuffix (packS s)
 
+commonPrefixes a0@(_:_) b0@(_:_) = Just (go a0 b0 [])
+    where go (a:as) (b:bs) ps
+              | a == b = go as bs (a:ps)
+          go as bs ps  = (reverse ps,as,bs)
+commonPrefixes _ _ = Nothing
+
+t_commonPrefixes a b (NonEmpty p)
+    = commonPrefixes pa pb ==
+      repack `fmap` T.commonPrefixes (packS pa) (packS pb)
+  where repack (x,y,z) = (unpackS x,unpackS y,unpackS z)
+        pa = p ++ a
+        pb = p ++ b
+
+tl_commonPrefixes a b (NonEmpty p)
+    = commonPrefixes pa pb ==
+      repack `fmap` TL.commonPrefixes (packS pa) (packS pb)
+  where repack (x,y,z) = (unpackS x,unpackS y,unpackS z)
+        pa = p ++ a
+        pb = p ++ b
+
 sf_elem p c       = (L.elem c . L.filter p) `eqP` (S.elem c . S.filter p)
 sf_filter q p     = (L.filter p . L.filter q) `eqP`
                     (unpackS . S.filter p . S.filter q)
       testProperty "t_stripPrefix" t_stripPrefix,
       testProperty "tl_stripPrefix" tl_stripPrefix,
       testProperty "t_stripSuffix" t_stripSuffix,
-      testProperty "tl_stripSuffix" tl_stripSuffix
+      testProperty "tl_stripSuffix" tl_stripSuffix,
+      testProperty "t_commonPrefixes" t_commonPrefixes,
+      testProperty "tl_commonPrefixes" tl_commonPrefixes
     ]
   ],