Bryan O'Sullivan avatar Bryan O'Sullivan committed e8797ca

Implement and document the new replacement behaviour.

Comments (0)

Files changed (3)

 -- Stability   : experimental
 -- Portability : GHC
 --
--- A time and space-efficient implementation of Unicode text using
--- packed Word16 arrays.  Suitable for performance critical use, both
--- in terms of large data quantities and high speed.
+-- A time and space-efficient implementation of Unicode text.
+-- Suitable for performance critical use, both in terms of large data
+-- quantities and high speed.
+--
+-- /Note/: Read below the synopsis for important notes on the use of
+-- this module.
 --
 -- This module is intended to be imported @qualified@, to avoid name
 -- clashes with "Prelude" functions, e.g.
 --
 -- > import qualified Data.Text as T
+--
+-- To use an extended and very rich family of functions for working
+-- with Unicode text (including normalization, regular expressions,
+-- non-standard encodings, text breaking, and locales), see the
+-- @text-icu@ package: <http://hackage.haskell.org/package/text-icu>
 
 module Data.Text
     (
     -- * Strict vs lazy types
     -- $strict
 
+    -- * Acceptable data
+    -- $replacement
+
     -- * Fusion
     -- $fusion
 
 import qualified Data.Text.Fusion as S
 import qualified Data.Text.Fusion.Common as S
 import Data.Text.Fusion (stream, reverseStream, unstream)
-import Data.Text.Internal (Text(..), empty, text, textP)
+import Data.Text.Internal (Text(..), empty, firstf, safe, text, textP)
 import qualified Prelude as P
 import Data.Text.Unsafe (Iter(..), iter, iter_, lengthWord16, reverseIter,
                          unsafeHead, unsafeTail)
 -- difference being that the strict module uses 'Int' values for
 -- lengths and counts, while the lazy module uses 'Int64' lengths.
 
+-- $replacement
+--
+-- A 'Text' value is a sequence of Unicode scalar values, as defined
+-- in &#xa7;3.9, definition D76 of the Unicode 5.2 standard:
+-- <http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf#page=35>. As
+-- such, a 'Text' cannot contain values in the range U+D800 to U+DFFF
+-- inclusive. Haskell implementations admit all Unicode code points
+-- (&#xa7;3.4, definition D10) as 'Char' values, including code points
+-- from this invalid range.  This means that there are some 'Char'
+-- values that are not valid Unicode scalar values, and the functions
+-- in this module must handle those cases.
+--
+-- Within this module, many functions construct a 'Text' from one or
+-- more 'Char' values. Those functions will substitute 'Char' values
+-- that are not valid Unicode scalar values with the replacement
+-- character \"&#xfffd;\" (U+FFFD).  Functions that perform this
+-- inspection and replacement are documented with the phrase
+-- \"Performs replacement on invalid scalar values\".
+--
+-- (One reason for this policy of replacement is that internally, a
+-- 'Text' value is represented as packed UTF-16 data. Values in the
+-- range U+D800 through U+DFFF are used by UTF-16 to denote surrogate
+-- code points, and so cannot be represented. The functions replace
+-- invalid scalar values, instead of dropping them, as a security
+-- measure. For details, see Unicode Technical Report 36, &#xa7;3.5:
+-- <http://unicode.org/reports/tr36/#Deletion_of_Noncharacters>)
+
 -- $fusion
 --
 -- Most of the functions in this module are subject to /fusion/,
 -- -----------------------------------------------------------------------------
 -- * Conversion to/from 'Text'
 
--- | /O(n)/ Convert a 'String' into a 'Text'.  Subject to fusion.
+-- | /O(n)/ Convert a 'String' into a 'Text'.  Subject to
+-- fusion.  Performs replacement on invalid scalar values.
 pack :: String -> Text
-pack = unstream . S.streamList
+pack = unstream . S.streamList . L.map safe
 {-# INLINE [1] pack #-}
 
 -- | /O(n)/ Convert a Text into a String.  Subject to fusion.
 unpack = S.unstreamList . stream
 {-# INLINE [1] unpack #-}
 
--- | /O(1)/ Convert a character into a Text.
--- Subject to fusion.
+-- | /O(1)/ Convert a character into a Text.  Subject to fusion.
+-- Performs replacement on invalid scalar values.
 singleton :: Char -> Text
-singleton = unstream . S.singleton
+singleton = unstream . S.singleton . safe
 {-# INLINE [1] singleton #-}
 
 -- -----------------------------------------------------------------------------
 
 -- | /O(n)/ Adds a character to the front of a 'Text'.  This function
 -- is more costly than its 'List' counterpart because it requires
--- copying a new array.  Subject to fusion.
+-- copying a new array.  Subject to fusion.  Performs replacement on
+-- invalid scalar values.
 cons :: Char -> Text -> Text
-cons c t = unstream (S.cons c (stream t))
+cons c t = unstream (S.cons (safe c) (stream t))
 {-# INLINE cons #-}
 
 -- | /O(n)/ Adds a character to the end of a 'Text'.  This copies the
 -- entire array in the process, unless fused.  Subject to fusion.
+-- Performs replacement on invalid scalar values.
 snoc :: Text -> Char -> Text
-snoc t c = unstream (S.snoc (stream t) c)
+snoc t c = unstream (S.snoc (stream t) (safe c))
 {-# INLINE snoc #-}
 
 -- | /O(n)/ Appends one 'Text' to the other by copying both of them
 -- -----------------------------------------------------------------------------
 -- * Transformations
 -- | /O(n)/ 'map' @f@ @t@ is the 'Text' obtained by applying @f@ to
--- each element of @t@.  Subject to fusion.
+-- each element of @t@.  Subject to fusion.  Performs replacement on
+-- invalid scalar values.
 map :: (Char -> Char) -> Text -> Text
-map f t = unstream (S.map f (stream t))
+map f t = unstream (S.map (safe . f) (stream t))
 {-# INLINE [1] map #-}
 
 -- | /O(n)/ The 'intercalate' function takes a 'Text' and a list of
 {-# INLINE intercalate #-}
 
 -- | /O(n)/ The 'intersperse' function takes a character and places it
--- between the characters of a 'Text'.  Subject to fusion.
+-- between the characters of a 'Text'.  Subject to fusion.  Performs
+-- replacement on invalid scalar values.
 intersperse     :: Char -> Text -> Text
-intersperse c t = unstream (S.intersperse c (stream t))
+intersperse c t = unstream (S.intersperse (safe c) (stream t))
 {-# INLINE intersperse #-}
 
 -- | /O(n)/ Reverse the characters of a string. Subject to fusion.
 {-# INLINE toUpper #-}
 
 -- | /O(n)/ Left-justify a string to the given length, using the
--- specified fill character on the right. Subject to fusion. Examples:
+-- specified fill character on the right. Subject to fusion.
+-- Performs replacement on invalid scalar values.
+--
+-- Examples:
 --
 -- > justifyLeft 7 'x' "foo"    == "fooxxxx"
 -- > justifyLeft 3 'x' "foobar" == "foobar"
   #-}
 
 -- | /O(n)/ Right-justify a string to the given length, using the
--- specified fill character on the left. Examples:
+-- specified fill character on the left.  Performs replacement on
+-- invalid scalar values.
+--
+-- Examples:
 --
 -- > justifyRight 7 'x' "bar"    == "xxxxbar"
 -- > justifyRight 3 'x' "foobar" == "foobar"
   where len = length t
 {-# INLINE justifyRight #-}
 
--- | /O(n)/ Center a string to the given length, using the
--- specified fill character on either side. Examples:
+-- | /O(n)/ Center a string to the given length, using the specified
+-- fill character on either side.  Performs replacement on invalid
+-- scalar values.
+--
+-- Examples:
 --
 -- > center 8 'x' "HS" = "xxxHSxxx"
 center :: Int -> Char -> Text -> Text
 
 -- | /O(n)/ 'scanl' is similar to 'foldl', but returns a list of
 -- successive reduced values from the left. Subject to fusion.
+-- Performs replacement on invalid scalar values.
 --
 -- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...]
 --
 --
 -- > last (scanl f z xs) == foldl f z xs.
 scanl :: (Char -> Char -> Char) -> Char -> Text -> Text
-scanl f z t = unstream (S.scanl f z (stream t))
+scanl f z t = unstream (S.scanl g z (stream t))
+    where g a b = safe (f a b)
 {-# INLINE scanl #-}
 
 -- | /O(n)/ 'scanl1' is a variant of 'scanl' that has no starting
--- value argument.  Subject to fusion.
+-- value argument.  Subject to fusion.  Performs replacement on
+-- invalid scalar values.
 --
 -- > scanl1 f [x1, x2, ...] == [x1, x1 `f` x2, ...]
 scanl1 :: (Char -> Char -> Char) -> Text -> Text
            | otherwise = scanl f (unsafeHead t) (unsafeTail t)
 {-# INLINE scanl1 #-}
 
--- | /O(n)/ 'scanr' is the right-to-left dual of 'scanl'.
+-- | /O(n)/ 'scanr' is the right-to-left dual of 'scanl'.  Performs
+-- replacement on invalid scalar values.
 --
 -- > scanr f v == reverse . scanl (flip f) v . reverse
 scanr :: (Char -> Char -> Char) -> Char -> Text -> Text
-scanr f z = S.reverse . S.reverseScanr f z . reverseStream
+scanr f z = S.reverse . S.reverseScanr g z . reverseStream
+    where g a b = safe (f a b)
 {-# INLINE scanr #-}
 
 -- | /O(n)/ 'scanr1' is a variant of 'scanr' that has no starting
--- value argument.  Subject to fusion.
+-- value argument.  Subject to fusion.  Performs replacement on
+-- invalid scalar values.
 scanr1 :: (Char -> Char -> Char) -> Text -> Text
 scanr1 f t | null t    = empty
            | otherwise = scanr f (last t) (init t)
 
 -- | /O(n)/ Like a combination of 'map' and 'foldl''. Applies a
 -- function to each element of a 'Text', passing an accumulating
--- parameter from left to right, and returns a final 'Text'.
+-- parameter from left to right, and returns a final 'Text'.  Performs
+-- replacement on invalid scalar values.
 mapAccumL :: (a -> Char -> (a,Char)) -> a -> Text -> (a, Text)
-mapAccumL f z0 = S.mapAccumL f z0 . stream
+mapAccumL f z0 = S.mapAccumL g z0 . stream
+    where g a b = second safe (f a b)
 {-# INLINE mapAccumL #-}
 
 -- | The 'mapAccumR' function behaves like a combination of 'map' and
 -- 'Text', passing an accumulating parameter from right to left, and
 -- returning a final value of this accumulator together with the new
 -- 'Text'.
+-- Performs replacement on invalid scalar values.
 mapAccumR :: (a -> Char -> (a,Char)) -> a -> Text -> (a, Text)
-mapAccumR f z0 = second reverse . S.mapAccumL f z0 . reverseStream
+mapAccumR f z0 = second reverse . S.mapAccumL g z0 . reverseStream
+    where g a b = second safe (f a b)
 {-# INLINE mapAccumR #-}
 
 -- -----------------------------------------------------------------------------
 -- | /O(n)/ 'replicateChar' @n@ @c@ is a 'Text' of length @n@ with @c@ the
 -- value of every element. Subject to fusion.
 replicateChar :: Int -> Char -> Text
-replicateChar n c = unstream (S.replicateCharI n c)
+replicateChar n c = unstream (S.replicateCharI n (safe c))
 {-# INLINE replicateChar #-}
 
 -- | /O(n)/, where @n@ is the length of the result. The 'unfoldr'
 -- returns 'Nothing' if it is done producing the 'Text', otherwise
 -- 'Just' @(a,b)@.  In this case, @a@ is the next 'Char' in the
 -- string, and @b@ is the seed value for further production. Subject
--- to fusion.
+-- to fusion.  Performs replacement on invalid scalar values.
 unfoldr     :: (a -> Maybe (Char,a)) -> a -> Text
-unfoldr f s = unstream (S.unfoldr f s)
+unfoldr f s = unstream (S.unfoldr (firstf safe . f) s)
 {-# INLINE unfoldr #-}
 
 -- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a 'Text' from a seed
 -- first argument to 'unfoldrN'. This function is more efficient than
 -- 'unfoldr' when the maximum length of the result is known and
 -- correct, otherwise its performance is similar to 'unfoldr'. Subject
--- to fusion.
+-- to fusion.  Performs replacement on invalid scalar values.
 unfoldrN     :: Int -> (a -> Maybe (Char,a)) -> a -> Text
-unfoldrN n f s = unstream (S.unfoldrN n f s)
+unfoldrN n f s = unstream (S.unfoldrN n (firstf safe . f) s)
 {-# INLINE unfoldrN #-}
 
 -- -----------------------------------------------------------------------------
 
 -- | /O(n)/ 'zipWith' generalises 'zip' by zipping with the function
 -- given as the first argument, instead of a tupling function.
+-- Performs replacement on invalid scalar values.
 zipWith :: (Char -> Char -> Char) -> Text -> Text -> Text
-zipWith f t1 t2 = unstream (S.zipWith f (stream t1) (stream t2))
+zipWith f t1 t2 = unstream (S.zipWith g (stream t1) (stream t2))
+    where g a b = safe (f a b)
 {-# INLINE [0] zipWith #-}
 
 -- | /O(n)/ Breaks a 'Text' up into a list of words, delimited by 'Char's

Data/Text/Internal.hs

     -- * Construction
     , text
     , textP
+    -- * Safety
+    , safe
     -- * Code that must be here for accessibility
     , empty
+    -- * Utilities
+    , firstf
     -- * Debugging
     , showText
     ) where
 #if defined(ASSERTS)
 import Control.Exception (assert)
 #endif
+import Data.Bits ((.&.))
 import qualified Data.Text.Array as A
+import Data.Text.UnsafeChar (ord)
 import Data.Typeable (Typeable)
 
 -- | A space efficient, packed, unboxed Unicode text type.
 showText (Text arr off len) =
     "Text " ++ show (A.toList arr off len) ++ ' ' :
             show off ++ ' ' : show len
+
+-- | Map a 'Char' to a 'Text'-safe value.
+--
+-- UTF-16 surrogate code points are not included in the set of Unicode
+-- scalar values, but are unfortunately admitted as valid 'Char'
+-- values by Haskell.  They cannot be represented in a 'Text'.  This
+-- function remaps those code points to the Unicode replacement
+-- character \"&#xfffd;\", and leaves other code points unchanged.
+safe :: Char -> Char
+safe c
+    | ord c .&. 0x1ff800 /= 0xd800 = c
+    | otherwise                    = '\xfffd'
+{-# INLINE safe #-}
+
+-- | Apply a function to the first element of an optional pair.
+firstf :: (a -> c) -> Maybe (a,b) -> Maybe (c,b)
+firstf f (Just (a, b)) = Just (f a, b)
+firstf _  Nothing      = Nothing

Data/Text/Lazy.hs

 -- Portability : GHC
 --
 -- A time and space-efficient implementation of Unicode text using
--- lists of packed arrays.  This representation is suitable for high
+-- lists of packed arrays.
+--
+-- /Note/: Read below the synopsis for important notes on the use of
+-- this module.
+--
+-- The representation used by this module is suitable for high
 -- performance use and for streaming large quantities of data.  It
 -- provides a means to manipulate a large body of text without
 -- requiring that the entire content be resident in memory.
 
 module Data.Text.Lazy
     (
+    -- * Fusion
+    -- $fusion
+
+    -- * Acceptable data
+    -- $replacement
+
+    -- * Types
       Text
+
     -- * Creation and elimination
     , pack
     , unpack
 import Data.Text.Fusion.Internal (PairS(..))
 import Data.Text.Lazy.Fusion (stream, unstream)
 import Data.Text.Lazy.Internal (Text(..), chunk, empty, foldlChunks, foldrChunks)
-import Data.Text.Internal (textP)
+import Data.Text.Internal (firstf, safe, textP)
 import qualified Data.Text.Util as U
 import Data.Text.Lazy.Search (indices)
 
+-- $fusion
+--
+-- Most of the functions in this module are subject to /fusion/,
+-- meaning that a pipeline of such functions will usually allocate at
+-- most one 'Text' value.
+--
+-- As an example, consider the following pipeline:
+--
+-- > import Data.Text.Lazy as T
+-- > import Data.Text.Lazy.Encoding as E
+-- > import Data.ByteString.Lazy (ByteString)
+-- >
+-- > countChars :: ByteString -> Int
+-- > countChars = T.length . T.toUpper . E.decodeUtf8
+--
+-- From the type signatures involved, this looks like it should
+-- allocate one 'ByteString' value, and two 'Text' values. However,
+-- when a module is compiled with optimisation enabled under GHC, the
+-- two intermediate 'Text' values will be optimised away, and the
+-- function will be compiled down to a single loop over the source
+-- 'ByteString'.
+--
+-- Functions that can be fused by the compiler are documented with the
+-- phrase \"Subject to fusion\".
+
+-- $replacement
+--
+-- A 'Text' value is a sequence of Unicode scalar values, as defined
+-- in &#xa7;3.9, definition D76 of the Unicode 5.2 standard:
+-- <http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf#page=35>. As
+-- such, a 'Text' cannot contain values in the range U+D800 to U+DFFF
+-- inclusive. Haskell implementations admit all Unicode code points
+-- (&#xa7;3.4, definition D10) as 'Char' values, including code points
+-- from this invalid range.  This means that there are some 'Char'
+-- values that are not valid Unicode scalar values, and the functions
+-- in this module must handle those cases.
+--
+-- Within this module, many functions construct a 'Text' from one or
+-- more 'Char' values. Those functions will substitute 'Char' values
+-- that are not valid Unicode scalar values with the replacement
+-- character \"&#xfffd;\" (U+FFFD).  Functions that perform this
+-- inspection and replacement are documented with the phrase
+-- \"Performs replacement on invalid scalar values\".
+--
+-- (One reason for this policy of replacement is that internally, a
+-- 'Text' value is represented as packed UTF-16 data. Values in the
+-- range U+D800 through U+DFFF are used by UTF-16 to denote surrogate
+-- code points, and so cannot be represented. The functions replace
+-- invalid scalar values, instead of dropping them, as a security
+-- measure. For details, see Unicode Technical Report 36, &#xa7;3.5:
+-- <http://unicode.org/reports/tr36#Deletion_of_Noncharacters>)
+
 equal :: Text -> Text -> Bool
 equal Empty Empty = True
 equal Empty _     = False
 
 -- | /O(n)/ Convert a 'String' into a 'Text'.
 --
--- This function is subject to array fusion.
+-- Subject to fusion.  Performs replacement on invalid scalar values.
 pack :: String -> Text
-pack s = unstream (S.streamList s)
+pack = unstream . S.streamList . L.map safe
 {-# INLINE [1] pack #-}
 
 -- | /O(n)/ Convert a 'Text' into a 'String'.
 unpack t = S.unstreamList (stream t)
 {-# INLINE [1] unpack #-}
 
--- | /O(1)/ Convert a character into a Text.
--- Subject to fusion.
+-- | /O(1)/ Convert a character into a Text.  Subject to fusion.
+-- Performs replacement on invalid scalar values.
 singleton :: Char -> Text
 singleton c = Chunk (T.singleton c) Empty
 {-# INLINE [1] singleton #-}
 -- properties of code.
 
 -- | /O(n)/ 'map' @f@ @t@ is the 'Text' obtained by applying @f@ to
--- each element of @t@.  Subject to array fusion.
+-- each element of @t@.  Subject to fusion.  Performs replacement on
+-- invalid scalar values.
 map :: (Char -> Char) -> Text -> Text
-map f t = unstream (S.map f (stream t))
+map f t = unstream (S.map (safe . f) (stream t))
 {-# INLINE [1] map #-}
 
 -- | /O(n)/ The 'intercalate' function takes a 'Text' and a list of
 {-# INLINE intersperse #-}
 
 -- | /O(n)/ Left-justify a string to the given length, using the
--- specified fill character on the right. Subject to fusion. Examples:
+-- specified fill character on the right. Subject to fusion.  Performs
+-- replacement on invalid scalar values.
+--
+-- Examples:
 --
 -- > justifyLeft 7 'x' "foo"    == "fooxxxx"
 -- > justifyLeft 3 'x' "foobar" == "foobar"
   #-}
 
 -- | /O(n)/ Right-justify a string to the given length, using the
--- specified fill character on the left. Examples:
+-- specified fill character on the left.  Performs replacement on
+-- invalid scalar values.
+--
+-- Examples:
 --
 -- > justifyRight 7 'x' "bar"    == "xxxxbar"
 -- > justifyRight 3 'x' "foobar" == "foobar"
   where len = length t
 {-# INLINE justifyRight #-}
 
--- | /O(n)/ Center a string to the given length, using the
--- specified fill character on either side. Examples:
+-- | /O(n)/ Center a string to the given length, using the specified
+-- fill character on either side.  Performs replacement on invalid
+-- scalar values.
+--
+-- Examples:
 --
 -- > center 8 'x' "HS" = "xxxHSxxx"
 center :: Int64 -> Char -> Text -> Text
 {-# INLINE minimum #-}
 
 -- | /O(n)/ 'scanl' is similar to 'foldl', but returns a list of
--- successive reduced values from the left. This function is subject
--- to array fusion.
+-- successive reduced values from the left. Subject to fusion.
+-- Performs replacement on invalid scalar values.
 --
 -- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...]
 --
 --
 -- > last (scanl f z xs) == foldl f z xs.
 scanl :: (Char -> Char -> Char) -> Char -> Text -> Text
-scanl f z t = unstream (S.scanl f z (stream t))
+scanl f z t = unstream (S.scanl g z (stream t))
+    where g a b = safe (f a b)
 {-# INLINE scanl #-}
 
 -- | /O(n)/ 'scanl1' is a variant of 'scanl' that has no starting
--- value argument.  This function is subject to array fusion.
+-- value argument.  Subject to fusion.  Performs replacement on
+-- invalid scalar values.
 --
 -- > scanl1 f [x1, x2, ...] == [x1, x1 `f` x2, ...]
 scanl1 :: (Char -> Char -> Char) -> Text -> Text
                 Just (t,ts) -> scanl f t ts
 {-# INLINE scanl1 #-}
 
--- | /O(n)/ 'scanr' is the right-to-left dual of 'scanl'.
+-- | /O(n)/ 'scanr' is the right-to-left dual of 'scanl'.  Performs
+-- replacement on invalid scalar values.
 --
 -- > scanr f v == reverse . scanl (flip f) v . reverse
 scanr :: (Char -> Char -> Char) -> Char -> Text -> Text
-scanr f v = reverse . scanl (flip f) v . reverse
+scanr f v = reverse . scanl g v . reverse
+    where g a b = safe (f b a)
 
 -- | /O(n)/ 'scanr1' is a variant of 'scanr' that has no starting
--- value argument.
+-- value argument.  Performs replacement on invalid scalar values.
 scanr1 :: (Char -> Char -> Char) -> Text -> Text
 scanr1 f t | null t    = empty
            | otherwise = scanr f (last t) (init t)
 
 -- | /O(n)/ Like a combination of 'map' and 'foldl''. Applies a
 -- function to each element of a 'Text', passing an accumulating
--- parameter from left to right, and returns a final 'Text'.
+-- parameter from left to right, and returns a final 'Text'.  Performs
+-- replacement on invalid scalar values.
 mapAccumL :: (a -> Char -> (a,Char)) -> a -> Text -> (a, Text)
 mapAccumL f = go
   where
 -- a strict 'foldr'; it applies a function to each element of a
 -- 'Text', passing an accumulating parameter from right to left, and
 -- returning a final value of this accumulator together with the new
--- 'Text'.
+-- 'Text'.  Performs replacement on invalid scalar values.
 mapAccumR :: (a -> Char -> (a,Char)) -> a -> Text -> (a, Text)
 mapAccumR f = go
   where
 -- | /O(n)/ 'replicateChar' @n@ @c@ is a 'Text' of length @n@ with @c@ the
 -- value of every element. Subject to fusion.
 replicateChar :: Int64 -> Char -> Text
-replicateChar n c = unstream (S.replicateCharI n c)
+replicateChar n c = unstream (S.replicateCharI n (safe c))
 {-# INLINE replicateChar #-}
 
 {-# RULES
 -- 'Text' from a seed value. The function takes the element and
 -- returns 'Nothing' if it is done producing the 'Text', otherwise
 -- 'Just' @(a,b)@.  In this case, @a@ is the next 'Char' in the
--- string, and @b@ is the seed value for further production.
-unfoldr     :: (a -> Maybe (Char,a)) -> a -> Text
-unfoldr f s = unstream (S.unfoldr f s)
+-- string, and @b@ is the seed value for further production.  Performs
+-- replacement on invalid scalar values.
+unfoldr :: (a -> Maybe (Char,a)) -> a -> Text
+unfoldr f s = unstream (S.unfoldr (firstf safe . f) s)
 {-# INLINE unfoldr #-}
 
 -- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a 'Text' from a seed
 -- first argument to 'unfoldrN'. This function is more efficient than
 -- 'unfoldr' when the maximum length of the result is known and
 -- correct, otherwise its performance is similar to 'unfoldr'.
-unfoldrN     :: Int64 -> (a -> Maybe (Char,a)) -> a -> Text
-unfoldrN n f s = unstream (S.unfoldrN n f s)
+-- Performs replacement on invalid scalar values.
+unfoldrN :: Int64 -> (a -> Maybe (Char,a)) -> a -> Text
+unfoldrN n f s = unstream (S.unfoldrN n (firstf safe . f) s)
 {-# INLINE unfoldrN #-}
 
 -- | /O(n)/ 'take' @n@, applied to a 'Text', returns the prefix of the
 
 -- | /O(n)/ 'zipWith' generalises 'zip' by zipping with the function
 -- given as the first argument, instead of a tupling function.
+-- Performs replacement on invalid scalar values.
 zipWith :: (Char -> Char -> Char) -> Text -> Text -> Text
-zipWith f t1 t2 = unstream (S.zipWith f (stream t1) (stream t2))
+zipWith f t1 t2 = unstream (S.zipWith g (stream t1) (stream t2))
+    where g a b = safe (f a b)
 {-# INLINE [0] zipWith #-}
 
 revChunks :: [T.Text] -> Text
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.