Commits

Bryan O'Sullivan committed 220a579

Add portable line splitter.

Comments (0)

Files changed (2)

 
     -- ** Breaking into lines and words
     , lines
+    , lines'
     , words
     , unlines
     , unwords
     ) where
 
 import Prelude (Char, Bool(..), Functor(..), Int, Maybe(..), String,
-                Eq, (==), (++),
+                Eq(..), (++),
                 Read(..), Show(..),
                 (&&), (||), (+), (-), (<), (>), (<=), (>=), (.), ($),
                 not, return, otherwise)
 
 -- | /O(n)/ Breaks a 'Text' up into a list of 'Text's at
 -- newline 'Char's. The resulting strings do not contain newlines.
+lines :: Text -> [Text]
+lines ps | null ps   = []
+         | otherwise = h : if null t
+                           then []
+                           else lines (unsafeTail t)
+    where (h,t) = span (/= '\n') ps
+{-# INLINE lines #-}
+
+-- | /O(n)/ Portably breaks a 'Text' up into a list of 'Text's at line
+-- boundaries.
 --
-lines :: Text -> [Text]
-lines ps
-    | null ps = []
-    | otherwise = case search ps of
-             Nothing -> [ps]
-             Just n  -> take n ps : lines (drop (n+1) ps)
-    where search = elemIndex '\n'
-{-# INLINE lines #-}
+-- A line boundary is considered to be either a line feed, a carriage
+-- return immediately followed by a line feed, or a carriage return.
+-- This accounts for both Unix and Windows line ending conventions,
+-- and for the old convention used on Mac OS 9 and earlier.
+lines' :: Text -> [Text]
+lines' ps | null ps   = []
+          | otherwise = h : case uncons t of
+                              Nothing -> []
+                              Just (c,t')
+                                  | c == '\n' -> lines t'
+                                  | c == '\r' -> case uncons t' of
+                                                   Just ('\n',t'') -> lines t''
+                                                   _               -> lines t'
+    where (h,t) = span notEOL ps
+          notEOL c = c /= '\n' && c /= '\r'
+{-# INLINE lines' #-}
 
 -- | /O(n)/ Joins lines, after appending a terminating newline to
 -- each.

tests/Properties.hs

                        (unpack2 . T.breakSubstring (T.singleton c))
 
 prop_lines           = L.lines       `eqP` (map unpack . T.lines)
+prop_lines'          = lines'        `eqP` (map unpack . T.lines')
+    where lines' "" =  []
+          lines' s =  let (l, s') = break eol s
+                      in  l : case s' of
+                                []      -> []
+                                ('\r':'\n':s'') -> lines' s''
+                                (_:s'') -> lines' s''
+          eol c = c == '\r' || c == '\n'
 prop_words           = L.words       `eqP` (map unpack . T.words)
 prop_unlines         = L.unlines     `eq`  (unpack . T.unlines . map pack)
 prop_unwords         = L.unwords     `eq`  (unpack . T.unwords . map pack)
   ("prop_breakSubstring_isInfixOf", mytest prop_breakSubstring_isInfixOf),
 
   ("prop_lines", mytest prop_lines),
+  ("prop_lines'", mytest prop_lines'),
   ("prop_words", mytest prop_words),
   ("prop_unlines", mytest prop_unlines),
   ("prop_unwords", mytest prop_unwords),