Bryan O'Sullivan avatar Bryan O'Sullivan committed dedeed7

Add decodeUtf8' for lazy Text.

Comments (0)

Files changed (1)

Data/Text/Lazy/Encoding.hs

     , decodeUtf16BE
     , decodeUtf32LE
     , decodeUtf32BE
+
+    -- ** Catchable failure
+    , decodeUtf8'
+
     -- ** Controllable error handling
     , decodeUtf8With
     , decodeUtf16LEWith
     , encodeUtf32BE
     ) where
 
+import Control.Exception (evaluate, try)
 import Data.Bits ((.&.))
-import Data.Text.Encoding.Error (OnDecodeError, strictDecode)
+import Data.Text.Encoding.Error (OnDecodeError, UnicodeException, strictDecode)
 import Data.Text.Lazy.Internal (Text(..), chunk, empty, foldrChunks)
+import System.IO.Unsafe (unsafePerformIO)
 import qualified Data.ByteString as S
 import qualified Data.ByteString.Lazy as B
 import qualified Data.ByteString.Lazy.Internal as B
     desc = "Data.Text.Lazy.Encoding.decodeUtf8With: Invalid UTF-8 stream"
 {-# INLINE[0] decodeUtf8With #-}
 
--- | Decode a 'ByteString' containing UTF-8 encoded text.
+-- | Decode a 'ByteString' containing UTF-8 encoded text that is known
+-- to be valid.
 --
 -- If the input contains any invalid UTF-8 data, an exception will be
--- thrown.  For more control over the handling of invalid data, use
+-- thrown that cannot be caught in pure code.  For more control over
+-- the handling of invalid data, use 'decodeUtf8'' or
 -- 'decodeUtf8With'.
 decodeUtf8 :: B.ByteString -> Text
 decodeUtf8 = decodeUtf8With strictDecode
 {- RULES "LAZY STREAM stream/decodeUtf8' fusion" [1]
    forall bs. F.stream (decodeUtf8' bs) = E.streamUtf8 strictDecode bs #-}
 
+-- | Decode a 'ByteString' containing UTF-8 encoded text..
+--
+-- If the input contains any invalid UTF-8 data, the relevant
+-- exception will be returned, otherwise the decoded text.
+--
+-- /Note/: this function is /not/ lazy, as it must decode its entire
+-- input before it can return a result.  If you need lazy (streaming)
+-- decoding, use 'decodeUtf8With' in lenient mode.
+decodeUtf8' :: B.ByteString -> Either UnicodeException Text
+decodeUtf8' bs = unsafePerformIO $ do
+                   let t = decodeUtf8 bs
+                   try (evaluate (rnf t `seq` t))
+  where
+    rnf Empty        = ()
+    rnf (Chunk _ ts) = rnf ts
+{-# INLINE decodeUtf8' #-}
+
 encodeUtf8 :: Text -> B.ByteString
 encodeUtf8 (Chunk c cs) = B.Chunk (TE.encodeUtf8 c) (encodeUtf8 cs)
 encodeUtf8 Empty        = B.Empty
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.