Commits

Anonymous committed cad006c

Add `Data.Text.Lazy.Encoding.decodeLatin1` ISO-8859-1 decoding function

See 7c06306bd5b7382cb101f8632b5a1fc50697fe94 for more information

Comments (0)

Files changed (2)

Data/Text/Lazy/Encoding.hs

     -- * Decoding ByteStrings to Text
     -- $strict
       decodeASCII
+    , decodeLatin1
     , decodeUtf8
     , decodeUtf16LE
     , decodeUtf16BE
 -- | /Deprecated/.  Decode a 'ByteString' containing 7-bit ASCII
 -- encoded text.
 --
--- This function is deprecated.  Use 'decodeUtf8' instead.
+-- This function is deprecated.  Use 'decodeLatin1' instead.
 decodeASCII :: B.ByteString -> Text
 decodeASCII = decodeUtf8
 {-# DEPRECATED decodeASCII "Use decodeUtf8 instead" #-}
 
+-- | Decode a 'ByteString' containing Latin-1 (aka ISO-8859-1) encoded text.
+decodeLatin1 :: B.ByteString -> Text
+decodeLatin1 = foldr (chunk . TE.decodeLatin1) empty . B.toChunks
+
 -- | Decode a 'ByteString' containing UTF-8 encoded text.
 decodeUtf8With :: OnDecodeError -> B.ByteString -> Text
 decodeUtf8With onErr bs0 = fast bs0

tests/Tests/Properties.hs

 import Test.Framework.Providers.QuickCheck2 (testProperty)
 import qualified Data.Bits as Bits (shiftL, shiftR)
 import qualified Data.ByteString as B
+import qualified Data.ByteString.Lazy as BL
 import qualified Data.List as L
 import qualified Data.Text as T
 import qualified Data.Text.Encoding as E
 -- Note: this silently truncates code-points > 255 to 8-bit due to 'B.pack'
 encodeL1 :: T.Text -> B.ByteString
 encodeL1 = B.pack . map (fromIntegral . fromEnum) . T.unpack
+encodeLazyL1 :: TL.Text -> BL.ByteString
+encodeLazyL1 = BL.fromChunks . map encodeL1 . TL.toChunks
 
 t_ascii t    = E.decodeASCII (E.encodeUtf8 a) == a
     where a  = T.map (\c -> chr (ord c `mod` 128)) t
     where a  = TL.map (\c -> chr (ord c `mod` 128)) t
 t_latin1 t   = E.decodeLatin1 (encodeL1 a) == a
     where a  = T.map (\c -> chr (ord c `mod` 256)) t
+tl_latin1 t  = EL.decodeLatin1 (encodeLazyL1 a) == a
+    where a  = TL.map (\c -> chr (ord c `mod` 256)) t
 t_utf8       = forAll genUnicode $ (E.decodeUtf8 . E.encodeUtf8) `eq` id
 t_utf8'      = forAll genUnicode $ (E.decodeUtf8' . E.encodeUtf8) `eq` (id . Right)
 tl_utf8      = forAll genUnicode $ (EL.decodeUtf8 . EL.encodeUtf8) `eq` id
       testProperty "t_ascii" t_ascii,
       testProperty "tl_ascii" tl_ascii,
       testProperty "t_latin1" t_latin1,
+      testProperty "tl_latin1" tl_latin1,
       testProperty "t_utf8" t_utf8,
       testProperty "t_utf8'" t_utf8',
       testProperty "tl_utf8" tl_utf8,