Bryan O'Sullivan avatar Bryan O'Sullivan committed df56fb1

Add commonPrefixes functions, and make lazy strip{Pre,Suf}fix faster

The old implemenations of stripPrefix and stripSuffix were placeholders
with terrible performance, but Sebastiaan Visser asked for them to
be improved ... so they are.

Comments (0)

Files changed (3)

     -- ** View patterns
     , stripPrefix
     , stripSuffix
+    , commonPrefixes
 
     -- * Searching
     , filter
 -------------------------------------------------------------------------------
 -- * View patterns
 
--- | /O(n)/ Returns the suffix of the second string if its prefix
--- matches the first.
+-- | /O(n)/ Return the suffix of the second string if its prefix
+-- matches the entire first string.
 --
 -- Examples:
 --
 -- > stripPrefix "foo" "foobar" == Just "bar"
+-- > stripPrefix ""    "baz"    == Just "baz"
 -- > stripPrefix "foo" "quux"   == Nothing
 --
 -- This is particularly useful with the @ViewPatterns@ extension to
 -- >
 -- > fnordLength :: Text -> Int
 -- > fnordLength (stripPrefix "fnord" -> Just suf) = T.length suf
--- > fnordLength _                              = -1
+-- > fnordLength _                                 = -1
 stripPrefix :: Text -> Text -> Maybe Text
 stripPrefix p@(Text _arr _off plen) t@(Text arr off len)
     | p `isPrefixOf` t = Just $! textP arr (off+plen) (len-plen)
     | otherwise        = Nothing
 
--- | /O(n)/ Returns the prefix of the second string if its suffix
--- matches the first.
+-- | /O(n)/ Find the longest non-empty common prefix of two strings
+-- and return it, along with the suffixes of each string at which they
+-- no longer match.
+--
+-- If the strings do not have a common prefix or either one is empty,
+-- this function returns 'Nothing'.
+--
+-- Examples:
+--
+-- > commonPrefixes "foobar" "fooquux" == Just ("foo","bar","quux")
+-- > commonPrefixes "veeble" "fetzer"  == Nothing
+-- > commonPrefixes "" "baz"           == Nothing
+commonPrefixes :: Text -> Text -> Maybe (Text,Text,Text)
+commonPrefixes t0@(Text arr0 off0 len0) t1@(Text arr1 off1 len1) = go 0 0
+  where
+    go !i !j | i < len0 && j < len1 && a == b = go (i+d0) (j+d1)
+             | i > 0     = Just (Text arr0 off0 i,
+                                 textP arr0 (off0+i) (len0-i),
+                                 textP arr1 (off1+j) (len1-j))
+             | otherwise = Nothing
+      where Iter a d0 = iter t0 i
+            Iter b d1 = iter t1 j
+
+-- | /O(n)/ Return the prefix of the second string if its suffix
+-- matches the entire first string.
 --
 -- Examples:
 --
 -- > stripSuffix "bar" "foobar" == Just "foo"
+-- > stripSuffix ""    "baz"    == Just "baz"
 -- > stripSuffix "foo" "quux"   == Nothing
 --
 -- This is particularly useful with the @ViewPatterns@ extension to
 -- >
 -- > quuxLength :: Text -> Int
 -- > quuxLength (stripSuffix "quux" -> Just pre) = T.length pre
--- > quuxLength _                             = -1
+-- > quuxLength _                                = -1
 stripSuffix :: Text -> Text -> Maybe Text
 stripSuffix p@(Text _arr _off plen) t@(Text arr off len)
     | p `isSuffixOf` t = Just $! textP arr off (len-plen)

Data/Text/Lazy.hs

     -- ** View patterns
     , stripPrefix
     , stripSuffix
+    , commonPrefixes
 
     -- * Searching
     , filter
 import Prelude (Char, Bool(..), Maybe(..), String,
                 Eq(..), Ord(..), Ordering(..), Read(..), Show(..),
                 (&&), (||), (+), (-), (.), ($), (++),
-                div, error, flip, fromIntegral, not, otherwise)
+                div, error, flip, fmap, fromIntegral, not, otherwise)
 import qualified Prelude as P
 #if defined(HAVE_DEEPSEQ)
 import Control.DeepSeq (NFData(..))
 -------------------------------------------------------------------------------
 -- * View patterns
 
--- | /O(n)/ Returns the suffix of the second string if its prefix
--- matches the first.
+-- | /O(n)/ Return the suffix of the second string if its prefix
+-- matches the entire first string.
 --
 -- Examples:
 --
 -- > stripPrefix "foo" "foobar" == Just "bar"
+-- > stripPrefix ""    "baz"    == Just "baz"
 -- > stripPrefix "foo" "quux"   == Nothing
 --
 -- This is particularly useful with the @ViewPatterns@ extension to
 -- GHC, as follows:
 --
 -- > {-# LANGUAGE ViewPatterns #-}
--- > import Data.Text as T
+-- > import Data.Text.Lazy as T
 -- >
 -- > fnordLength :: Text -> Int
 -- > fnordLength (stripPrefix "fnord" -> Just suf) = T.length suf
--- > fnordLength _                              = -1
+-- > fnordLength _                                 = -1
 stripPrefix :: Text -> Text -> Maybe Text
--- Yes, this could be much more efficient.
 stripPrefix p t
-    | p `isPrefixOf` t = Just (drop (length p) t)
-    | otherwise        = Nothing
+    | null p    = Just t
+    | otherwise = case commonPrefixes p t of
+                    Just (_,c,r) | null c -> Just r
+                    _                     -> Nothing
 
--- | /O(n)/ Returns the prefix of the second string if its suffix
--- matches the first.
+-- | /O(n)/ Find the longest non-empty common prefix of two strings
+-- and return it, along with the suffixes of each string at which they
+-- no longer match.
+--
+-- If the strings do not have a common prefix or either one is empty,
+-- this function returns 'Nothing'.
+--
+-- Examples:
+--
+-- > commonPrefixes "foobar" "fooquux" == Just ("foo","bar","quux")
+-- > commonPrefixes "veeble" "fetzer"  == Nothing
+-- > commonPrefixes "" "baz"           == Nothing
+commonPrefixes :: Text -> Text -> Maybe (Text,Text,Text)
+commonPrefixes Empty _ = Nothing
+commonPrefixes _ Empty = Nothing
+commonPrefixes a0 b0   = Just (go a0 b0 [])
+  where
+    go t0@(Chunk x xs) t1@(Chunk y ys) ps
+        = case T.commonPrefixes x y of
+            Just (p,a,b)
+              | T.null a  -> go xs (chunk b ys) (p:ps)
+              | T.null b  -> go (chunk a xs) ys (p:ps)
+              | otherwise -> (fromChunks (L.reverse (p:ps)),chunk a xs, chunk b ys)
+            Nothing       -> (fromChunks (L.reverse ps),t0,t1)
+    go t0 t1 ps = (fromChunks (L.reverse ps),t0,t1)
+
+-- | /O(n)/ Return the prefix of the second string if its suffix
+-- matches the entire first string.
 --
 -- Examples:
 --
 -- > stripSuffix "bar" "foobar" == Just "foo"
+-- > stripSuffix ""    "baz"    == Just "baz"
 -- > stripSuffix "foo" "quux"   == Nothing
 --
 -- This is particularly useful with the @ViewPatterns@ extension to
 -- GHC, as follows:
 --
 -- > {-# LANGUAGE ViewPatterns #-}
--- > import Data.Text as T
+-- > import Data.Text.Lazy as T
 -- >
 -- > quuxLength :: Text -> Int
 -- > quuxLength (stripSuffix "quux" -> Just pre) = T.length pre
--- > quuxLength _                             = -1
+-- > quuxLength _                                = -1
 stripSuffix :: Text -> Text -> Maybe Text
--- Yes, this could be much more efficient.
-stripSuffix p t
-    | p `isSuffixOf` t = Just (take (length t - length p) t)
-    | otherwise        = Nothing
+stripSuffix p t = reverse `fmap` stripPrefix (reverse p) (reverse t)
 
 -- | /O(n)/ 'filter', applied to a predicate and a 'Text',
 -- returns a 'Text' containing those characters that satisfy the

tests/Properties.hs

 t_stripSuffix s      = (fmap packS . stripSuffix s) `eqP` T.stripSuffix (packS s)
 tl_stripSuffix s     = (fmap packS . stripSuffix s) `eqP` TL.stripSuffix (packS s)
 
+commonPrefixes a0@(_:_) b0@(_:_) = Just (go a0 b0 [])
+    where go (a:as) (b:bs) ps
+              | a == b = go as bs (a:ps)
+          go as bs ps  = (reverse ps,as,bs)
+commonPrefixes _ _ = Nothing
+
+t_commonPrefixes a b (NonEmpty p)
+    = commonPrefixes pa pb ==
+      repack `fmap` T.commonPrefixes (packS pa) (packS pb)
+  where repack (x,y,z) = (unpackS x,unpackS y,unpackS z)
+        pa = p ++ a
+        pb = p ++ b
+
+tl_commonPrefixes a b (NonEmpty p)
+    = commonPrefixes pa pb ==
+      repack `fmap` TL.commonPrefixes (packS pa) (packS pb)
+  where repack (x,y,z) = (unpackS x,unpackS y,unpackS z)
+        pa = p ++ a
+        pb = p ++ b
+
 sf_elem p c       = (L.elem c . L.filter p) `eqP` (S.elem c . S.filter p)
 sf_filter q p     = (L.filter p . L.filter q) `eqP`
                     (unpackS . S.filter p . S.filter q)
       testProperty "t_stripPrefix" t_stripPrefix,
       testProperty "tl_stripPrefix" tl_stripPrefix,
       testProperty "t_stripSuffix" t_stripSuffix,
-      testProperty "tl_stripSuffix" tl_stripSuffix
+      testProperty "tl_stripSuffix" tl_stripSuffix,
+      testProperty "t_commonPrefixes" t_commonPrefixes,
+      testProperty "tl_commonPrefixes" tl_commonPrefixes
     ]
   ],
 
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.