Bryan O'Sullivan committed 57bf190

Ensure that an encoding error handler's result is safe

Comments (0)

Files changed (2)


 import Data.ByteString as B
 import Data.ByteString.Internal as B
 import Data.Text.Encoding.Error (OnDecodeError, UnicodeException, strictDecode)
-import Data.Text.Internal (Text(..), textP)
+import Data.Text.Internal (Text(..), safe, textP)
 import Data.Text.Private (runText)
 import Data.Text.UnsafeChar (ord, unsafeWrite)
 import Data.Text.UnsafeShift (shiftL, shiftR)
                       Just c -> do
                         destOff <- peek destOffPtr
                         w <- unsafeSTToIO $
-                             unsafeWrite dest (fromIntegral destOff) c
+                             unsafeWrite dest (fromIntegral destOff) (safe c)
                         poke destOffPtr (destOff + fromIntegral w)
                         loop $ curPtr' `plusPtr` 1
           loop (ptr `plusPtr` off)


 import Control.Exception (SomeException, handle)
 import System.IO
-import Test.HUnit (assertFailure)
+import Test.HUnit (assertBool, assertFailure)
 import qualified Data.ByteString as B
 import qualified Data.ByteString.Lazy as LB
 import qualified Data.Text as T
+import qualified Data.Text.Encoding as TE
 import qualified Data.Text.IO as T
 import qualified Data.Text.Lazy as LT
 import qualified Data.Text.Lazy.Encoding as LE
     power | maxBound == (2147483647::Int) = 28
           | otherwise                     = 60 :: Int
+-- Reported by John Millikin: a UTF-8 decode error handler could
+-- return a bogus substitution character, which we would write without
+-- checking.
+utf8_decode_unsafe :: IO ()
+utf8_decode_unsafe = do
+  let t = TE.decodeUtf8With (\_ _ -> Just '\xdc00') "\x80"
+  assertBool "broken error recovery shouldn't break us" (t == "\xfffd")
 tests :: F.Test
 tests = F.testGroup "Regressions"
     [ F.testCase "hGetContents_crash" hGetContents_crash
     , F.testCase "lazy_encode_crash" lazy_encode_crash
     , F.testCase "replicate_crash" replicate_crash
+    , F.testCase "utf8_decode_unsafe" utf8_decode_unsafe
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.