Bryan O'Sullivan avatar Bryan O'Sullivan committed de7a10e Merge

Merge

Comments (0)

Files changed (30)

 6a3d847a56a69d0514a79cb212cb218271ad0917 0.11.1.13
 1d2c6fa9092c6a4000b2abdd9d01f3efcd477be5 0.11.2.0
 78219784cf3652cc662805bf2971bd62d80210a9 0.11.2.1
+4297307ebc11ad677cfba6b40319e7e5e2c0cfee 0.11.2.3
 {-# LANGUAGE BangPatterns, CPP, MagicHash, Rank2Types, UnboxedTuples #-}
 {-# OPTIONS_GHC -fno-warn-orphans #-}
+#if __GLASGOW_HASKELL__ >= 702
+{-# LANGUAGE Trustworthy #-}
+#endif
 
 -- |
 -- Module      : Data.Text
 -- | /O(n)/ Convert a 'String' into a 'Text'.  Subject to
 -- fusion.  Performs replacement on invalid scalar values.
 pack :: String -> Text
-pack = unstream . S.streamList . L.map safe
+pack = unstream . S.map safe . S.streamList
 {-# INLINE [1] pack #-}
 
 -- | /O(n)/ Convert a Text into a String.  Subject to fusion.
 unpack = S.unstreamList . stream
 {-# INLINE [1] unpack #-}
 
--- | /O(n)/ Convert a literal string into a Text.
+-- | /O(n)/ Convert a literal string into a Text.  Subject to fusion.
 unpackCString# :: Addr# -> Text
 unpackCString# addr# = unstream (S.streamCString# addr#)
 {-# NOINLINE unpackCString# #-}
 
 {-# RULES "TEXT literal" forall a.
-    unstream (S.streamList (L.map safe (GHC.unpackCString# a)))
+    unstream (S.map safe (S.streamList (GHC.unpackCString# a)))
       = unpackCString# a #-}
 
 {-# RULES "TEXT literal UTF8" forall a.
-    unstream (S.streamList (L.map safe (GHC.unpackCStringUtf8# a)))
+    unstream (S.map safe (S.streamList (GHC.unpackCStringUtf8# a)))
       = unpackCString# a #-}
 
 -- | /O(1)/ Convert a character into a Text.  Subject to fusion.

Data/Text/Array.hs

 -- | Create an uninitialized mutable array.
 new :: forall s. Int -> ST s (MArray s)
 new n
-  | n < 0 || n .&. highBit /= 0 = error $ "Data.Text.Array.new: size overflow"
+  | n < 0 || n .&. highBit /= 0 = array_size_error
   | otherwise = ST $ \s1# ->
        case newByteArray# len# s1# of
          (# s2#, marr# #) -> (# s2#, MArray marr#
         highBit    = maxBound `xor` (maxBound `shiftR` 1)
 {-# INLINE new #-}
 
+array_size_error :: a
+array_size_error = error "Data.Text.Array.new: size overflow"
+
 -- | Freeze a mutable array. Do not mutate the 'MArray' afterwards!
 unsafeFreeze :: MArray s -> ST s Array
 unsafeFreeze MArray{..} = ST $ \s# ->
                  (marr,b) <- k
                  arr <- unsafeFreeze marr
                  return (arr,b))
+{-# INLINE run2 #-}
 
 -- | Copy some elements of a mutable array.
 copyM :: MArray s               -- ^ Destination

Data/Text/Encoding.hs

 {-# LANGUAGE BangPatterns, CPP, ForeignFunctionInterface, MagicHash,
     UnliftedFFITypes #-}
+#if __GLASGOW_HASKELL__ >= 702
+{-# LANGUAGE Trustworthy #-}
+#endif
 -- |
 -- Module      : Data.Text.Encoding
 -- Copyright   : (c) 2009, 2010, 2011 Bryan O'Sullivan,
     -- * Decoding ByteStrings to Text
     -- $strict
       decodeASCII
+    , decodeLatin1
     , decodeUtf8
     , decodeUtf16LE
     , decodeUtf16BE
 import Data.ByteString as B
 import Data.ByteString.Internal as B
 import Data.Text.Encoding.Error (OnDecodeError, UnicodeException, strictDecode)
-import Data.Text.Internal (Text(..))
+import Data.Text.Internal (Text(..), safe, textP)
 import Data.Text.Private (runText)
 import Data.Text.UnsafeChar (ord, unsafeWrite)
 import Data.Text.UnsafeShift (shiftL, shiftR)
 import Foreign.Ptr (Ptr, minusPtr, plusPtr)
 import Foreign.Storable (peek, poke)
 import GHC.Base (MutableByteArray#)
-import System.IO.Unsafe (unsafePerformIO)
 import qualified Data.Text.Array as A
 import qualified Data.Text.Encoding.Fusion as E
 import qualified Data.Text.Encoding.Utf16 as U16
 import qualified Data.Text.Fusion as F
+import Data.Text.Unsafe (unsafeDupablePerformIO)
 
 -- $strict
 --
 -- | /Deprecated/.  Decode a 'ByteString' containing 7-bit ASCII
 -- encoded text.
 --
--- This function is deprecated.  Use 'decodeUtf8' instead.
+-- This function is deprecated.  Use 'decodeLatin1' instead.
 decodeASCII :: ByteString -> Text
 decodeASCII = decodeUtf8
 {-# DEPRECATED decodeASCII "Use decodeUtf8 instead" #-}
 
+-- | Decode a 'ByteString' containing Latin-1 (aka ISO-8859-1) encoded text.
+--
+-- 'decodeLatin1' is semantically equivalent to
+--  @Data.Text.pack . Data.ByteString.Char8.unpack@
+decodeLatin1 :: ByteString -> Text
+decodeLatin1 (PS fp off len) = textP a 0 len
+ where
+  a = A.run (A.new len >>= unsafeIOToST . go)
+  go dest = withForeignPtr fp $ \ptr -> do
+    c_decode_latin1 (A.maBA dest) (ptr `plusPtr` off) (ptr `plusPtr` (off+len))
+    return dest
+
 -- | Decode a 'ByteString' containing UTF-8 encoded text.
 decodeUtf8With :: OnDecodeError -> ByteString -> Text
 decodeUtf8With onErr (PS fp off len) = runText $ \done -> do
                       Just c -> do
                         destOff <- peek destOffPtr
                         w <- unsafeSTToIO $
-                             unsafeWrite dest (fromIntegral destOff) c
+                             unsafeWrite dest (fromIntegral destOff) (safe c)
                         poke destOffPtr (destOff + fromIntegral w)
                         loop $ curPtr' `plusPtr` 1
           loop (ptr `plusPtr` off)
 -- If the input contains any invalid UTF-8 data, the relevant
 -- exception will be returned, otherwise the decoded text.
 decodeUtf8' :: ByteString -> Either UnicodeException Text
-decodeUtf8' = unsafePerformIO . try . evaluate . decodeUtf8With strictDecode
+decodeUtf8' = unsafeDupablePerformIO . try . evaluate . decodeUtf8With strictDecode
 {-# INLINE decodeUtf8' #-}
 
 -- | Encode text using UTF-8 encoding.
 encodeUtf8 :: Text -> ByteString
-encodeUtf8 (Text arr off len) = unsafePerformIO $ do
+encodeUtf8 (Text arr off len) = unsafeDupablePerformIO $ do
   let size0 = max len 4
   mallocByteString size0 >>= start size0 off 0
  where
 foreign import ccall unsafe "_hs_text_decode_utf8" c_decode_utf8
     :: MutableByteArray# s -> Ptr CSize
     -> Ptr Word8 -> Ptr Word8 -> IO (Ptr Word8)
+
+foreign import ccall unsafe "_hs_text_decode_latin1" c_decode_latin1
+    :: MutableByteArray# s -> Ptr Word8 -> Ptr Word8 -> IO ()

Data/Text/Encoding/Error.hs

 {-# LANGUAGE CPP, DeriveDataTypeable #-}
+#if __GLASGOW_HASKELL__ >= 702
+{-# LANGUAGE Trustworthy #-}
+#endif
 -- |
 -- Module      : Data.Text.Encoding.Error
 -- Copyright   : (c) Bryan O'Sullivan 2009

Data/Text/Encoding/Fusion.hs

 import Data.Word (Word8, Word16, Word32)
 import Foreign.ForeignPtr (withForeignPtr, ForeignPtr)
 import Foreign.Storable (pokeByteOff)
-import System.IO.Unsafe (unsafePerformIO)
 import qualified Data.ByteString as B
 import qualified Data.ByteString.Unsafe as B
 import qualified Data.Text.Encoding.Utf8 as U8
 import qualified Data.Text.Encoding.Utf16 as U16
 import qualified Data.Text.Encoding.Utf32 as U32
+import Data.Text.Unsafe (unsafeDupablePerformIO)
 
 streamASCII :: ByteString -> Stream Char
 streamASCII bs = Stream next 0 (maxSize l)
 
 -- | /O(n)/ Convert a 'Stream' 'Word8' to a 'ByteString'.
 unstream :: Stream Word8 -> ByteString
-unstream (Stream next s0 len) = unsafePerformIO $ do
+unstream (Stream next s0 len) = unsafeDupablePerformIO $ do
     let mlen = upperBound 4 len
     mallocByteString mlen >>= loop mlen 0 s0
     where

Data/Text/Fusion/Common.hs

 
 -- | /O(n)/ Adds a character to the front of a Stream Char.
 cons :: Char -> Stream Char -> Stream Char
-cons w (Stream next0 s0 len) = Stream next (C1 s0) (len+1)
+cons !w (Stream next0 s0 len) = Stream next (C1 s0) (len+1)
     where
       next (C1 s) = Yield w (C0 s)
       next (C0 s) = case next0 s of
       loop_head !s = case next s of
                       Yield x _ -> x
                       Skip s'   -> loop_head s'
-                      Done      -> streamError "head" "Empty stream"
+                      Done      -> head_empty
 {-# INLINE [0] head #-}
 
+head_empty :: a
+head_empty = streamError "head" "Empty stream"
+{-# NOINLINE head_empty #-}
+
 -- | /O(1)/ Returns the first character and remainder of a 'Stream
 -- Char', or 'Nothing' if empty.  Subject to array fusion.
 uncons :: Stream Char -> Maybe (Char, Stream Char)
 {-# LANGUAGE BangPatterns, CPP, RecordWildCards, ScopedTypeVariables #-}
+#if __GLASGOW_HASKELL__ >= 702
+{-# LANGUAGE Trustworthy #-}
+#endif
 -- |
 -- Module      : Data.Text.IO
 -- Copyright   : (c) 2009, 2010 Bryan O'Sullivan,
     , appendFile
     -- * Operations on handles
     , hGetContents
+    , hGetChunk
     , hGetLine
     , hPutStr
     , hPutStrLn
 appendFile :: FilePath -> Text -> IO ()
 appendFile p = withFile p AppendMode . flip hPutStr
 
+catchError :: String -> Handle -> Handle__ -> IOError -> IO Text
+catchError caller h Handle__{..} err
+    | isEOFError err = do
+        buf <- readIORef haCharBuffer
+        return $ if isEmptyBuffer buf
+                 then T.empty
+                 else T.singleton '\r'
+    | otherwise = E.throwIO (augmentIOError err caller h)
+
+-- | /Experimental./ Read a single chunk of strict text from a
+-- 'Handle'. The size of the chunk depends on the amount of input
+-- currently buffered.
+--
+-- This function blocks only if there is no data available, and EOF
+-- has not yet been reached. Once EOF is reached, this function
+-- returns an empty string instead of throwing an exception.
+hGetChunk :: Handle -> IO Text
+hGetChunk h = wantReadableHandle "hGetChunk" h readSingleChunk
+ where
+  readSingleChunk hh@Handle__{..} = do
+    buf <- readIORef haCharBuffer
+    t <- readChunk hh buf `E.catch` catchError "hGetChunk" h hh
+    return (hh, t)
+
 -- | Read the remaining contents of a 'Handle' as a string.  The
 -- 'Handle' is closed once the contents have been read, or if an
 -- exception is thrown.
   wantReadableHandle "hGetContents" h readAll
  where
   readAll hh@Handle__{..} = do
-    let catchError e
-          | isEOFError e = do
-              buf <- readIORef haCharBuffer
-              return $ if isEmptyBuffer buf
-                       then T.empty
-                       else T.singleton '\r'
-          | otherwise = E.throwIO (augmentIOError e "hGetContents" h)
-        readChunks = do
+    let readChunks = do
           buf <- readIORef haCharBuffer
-          t <- readChunk hh buf `E.catch` catchError
+          t <- readChunk hh buf `E.catch` catchError "hGetContents" h hh
           if T.null t
             then return [t]
             else (t:) `fmap` readChunks

Data/Text/Internal.hs

 module Data.Text.Internal
     (
     -- * Types
+    -- $internals
       Text(..)
     -- * Construction
     , text
 
 -- | A space efficient, packed, unboxed Unicode text type.
 data Text = Text
-    {-# UNPACK #-} !A.Array          -- payload
-    {-# UNPACK #-} !Int              -- offset
-    {-# UNPACK #-} !Int              -- length
+    {-# UNPACK #-} !A.Array          -- payload (Word16 elements)
+    {-# UNPACK #-} !Int              -- offset (units of Word16, not Char)
+    {-# UNPACK #-} !Int              -- length (units of Word16, not Char)
     deriving (Typeable)
 
 -- | Smart constructor.
 -- scalar values, but are unfortunately admitted as valid 'Char'
 -- values by Haskell.  They cannot be represented in a 'Text'.  This
 -- function remaps those code points to the Unicode replacement
--- character \"&#xfffd;\", and leaves other code points unchanged.
+-- character (U+FFFD, \'&#xfffd;\'), and leaves other code points
+-- unchanged.
 safe :: Char -> Char
 safe c
     | ord c .&. 0x1ff800 /= 0xd800 = c
 firstf :: (a -> c) -> Maybe (a,b) -> Maybe (c,b)
 firstf f (Just (a, b)) = Just (f a, b)
 firstf _  Nothing      = Nothing
+
+-- $internals
+--
+-- Internally, the 'Text' type is represented as an array of 'Word16'
+-- UTF-16 code units. The offset and length fields in the constructor
+-- are in these units, /not/ units of 'Char'.
+--
+-- Invariants that all functions must maintain:
+--
+-- * Since the 'Text' type uses UTF-16 internally, it cannot represent
+--   characters in the reserved surrogate code point range U+D800 to
+--   U+DFFF. To maintain this invariant, the 'safe' function maps
+--   'Char' values in this range to the replacement character (U+FFFD,
+--   \'&#xfffd;\').
+--
+-- * A leading (or \"high\") surrogate code unit (0xD800–0xDBFF) must
+--   always be followed by a trailing (or \"low\") surrogate code unit
+--   (0xDC00-0xDFFF). A trailing surrogate code unit must always be
+--   preceded by a leading surrogate code unit.

Data/Text/Lazy.hs

 {-# OPTIONS_GHC -fno-warn-orphans #-}
 {-# LANGUAGE BangPatterns, MagicHash, CPP #-}
+#if __GLASGOW_HASKELL__ >= 702
+{-# LANGUAGE Trustworthy #-}
+#endif
 -- |
 -- Module      : Data.Text.Lazy
 -- Copyright   : (c) 2009, 2010, 2012 Bryan O'Sullivan

Data/Text/Lazy/Builder.hs

 -----------------------------------------------------------------------------
 -- |
 -- Module      : Data.Text.Lazy.Builder
--- Copyright   : (c) 2010 Johan Tibell
+-- Copyright   : (c) 2013 Bryan O'Sullivan
+--               (c) 2010 Johan Tibell
 -- License     : BSD3-style (see LICENSE)
 --
 -- Maintainer  : Johan Tibell <johan.tibell@gmail.com>
 -- @fromLazyText@, which construct new builders, and 'mappend', which
 -- concatenates two builders.
 --
--- To get maximum performance when building lazy @Text@ values using a builder, associate @mappend@ calls to the right.  For example, prefer
+-- To get maximum performance when building lazy @Text@ values using a
+-- builder, associate @mappend@ calls to the right.  For example,
+-- prefer
 --
 -- > singleton 'a' `mappend` (singleton 'b' `mappend` singleton 'c')
 --
    , flush
    ) where
 
-import Control.Monad.ST (ST, runST)
-import Data.Bits ((.&.))
-import Data.Monoid (Monoid(..))
-import Data.Text.Internal (Text(..))
-import Data.Text.Lazy.Internal (smallChunkSize)
-import Data.Text.Unsafe (inlineInterleaveST)
-import Data.Text.UnsafeChar (ord, unsafeWrite)
-import Data.Text.UnsafeShift (shiftR)
-import Prelude hiding (map, putChar)
-
-import qualified Data.String as String
-import qualified Data.Text as S
-import qualified Data.Text.Array as A
-import qualified Data.Text.Lazy as L
-
-------------------------------------------------------------------------
-
--- | A @Builder@ is an efficient way to build lazy @Text@ values.
--- There are several functions for constructing builders, but only one
--- to inspect them: to extract any data, you have to turn them into
--- lazy @Text@ values using @toLazyText@.
---
--- Internally, a builder constructs a lazy @Text@ by filling arrays
--- piece by piece.  As each buffer is filled, it is \'popped\' off, to
--- become a new chunk of the resulting lazy @Text@.  All this is
--- hidden from the user of the @Builder@.
-newtype Builder = Builder {
-     -- Invariant (from Data.Text.Lazy):
-     --      The lists include no null Texts.
-     runBuilder :: forall s. (Buffer s -> ST s [S.Text])
-                -> Buffer s
-                -> ST s [S.Text]
-   }
-
-instance Monoid Builder where
-   mempty  = empty
-   {-# INLINE mempty #-}
-   mappend = append
-   {-# INLINE mappend #-}
-   mconcat = foldr mappend mempty
-   {-# INLINE mconcat #-}
-
-instance String.IsString Builder where
-    fromString = fromString
-    {-# INLINE fromString #-}
-
-instance Show Builder where
-    show = show . toLazyText
-
-instance Eq Builder where
-    a == b = toLazyText a == toLazyText b
-
-instance Ord Builder where
-    a <= b = toLazyText a <= toLazyText b
-
-------------------------------------------------------------------------
-
--- | /O(1)./ The empty @Builder@, satisfying
---
---  * @'toLazyText' 'empty' = 'L.empty'@
---
-empty :: Builder
-empty = Builder (\ k buf -> k buf)
-{-# INLINE empty #-}
-
--- | /O(1)./ A @Builder@ taking a single character, satisfying
---
---  * @'toLazyText' ('singleton' c) = 'L.singleton' c@
---
-singleton :: Char -> Builder
-singleton c = writeAtMost 2 $ \ marr o ->
-    if n < 0x10000
-    then A.unsafeWrite marr o (fromIntegral n) >> return 1
-    else do
-        A.unsafeWrite marr o lo
-        A.unsafeWrite marr (o+1) hi
-        return 2
-  where n = ord c
-        m = n - 0x10000
-        lo = fromIntegral $ (m `shiftR` 10) + 0xD800
-        hi = fromIntegral $ (m .&. 0x3FF) + 0xDC00
-{-# INLINE singleton #-}
-
-------------------------------------------------------------------------
-
--- | /O(1)./ The concatenation of two builders, an associative
--- operation with identity 'empty', satisfying
---
---  * @'toLazyText' ('append' x y) = 'L.append' ('toLazyText' x) ('toLazyText' y)@
---
-append :: Builder -> Builder -> Builder
-append (Builder f) (Builder g) = Builder (f . g)
-{-# INLINE [0] append #-}
-
--- TODO: Experiment to find the right threshold.
-copyLimit :: Int
-copyLimit = 128
-
--- This function attempts to merge small @Text@ values instead of
--- treating each value as its own chunk.  We may not always want this.
-
--- | /O(1)./ A @Builder@ taking a 'S.Text', satisfying
---
---  * @'toLazyText' ('fromText' t) = 'L.fromChunks' [t]@
---
-fromText :: S.Text -> Builder
-fromText t@(Text arr off l)
-    | S.null t       = empty
-    | l <= copyLimit = writeN l $ \marr o -> A.copyI marr o arr off (l+o)
-    | otherwise      = flush `append` mapBuilder (t :)
-{-# INLINE [1] fromText #-}
-
-{-# RULES
-"fromText/pack" forall s .
-        fromText (S.pack s) = fromString s
- #-}
-
--- | /O(1)./ A Builder taking a @String@, satisfying
---
---  * @'toLazyText' ('fromString' s) = 'L.fromChunks' [S.pack s]@
---
-fromString :: String -> Builder
-fromString str = Builder $ \k (Buffer p0 o0 u0 l0) ->
-    let loop !marr !o !u !l [] = k (Buffer marr o u l)
-        loop marr o u l s@(c:cs)
-            | l <= 1 = do
-                arr <- A.unsafeFreeze marr
-                let !t = Text arr o u
-                marr' <- A.new chunkSize
-                ts <- inlineInterleaveST (loop marr' 0 0 chunkSize s)
-                return $ t : ts
-            | otherwise = do
-                n <- unsafeWrite marr (o+u) c
-                loop marr o (u+n) (l-n) cs
-    in loop p0 o0 u0 l0 str
-  where
-    chunkSize = smallChunkSize
-{-# INLINE fromString #-}
-
--- | /O(1)./ A @Builder@ taking a lazy @Text@, satisfying
---
---  * @'toLazyText' ('fromLazyText' t) = t@
---
-fromLazyText :: L.Text -> Builder
-fromLazyText ts = flush `append` mapBuilder (L.toChunks ts ++)
-{-# INLINE fromLazyText #-}
-
-------------------------------------------------------------------------
-
--- Our internal buffer type
-data Buffer s = Buffer {-# UNPACK #-} !(A.MArray s)
-                       {-# UNPACK #-} !Int  -- offset
-                       {-# UNPACK #-} !Int  -- used units
-                       {-# UNPACK #-} !Int  -- length left
-
-------------------------------------------------------------------------
-
--- | /O(n)./ Extract a lazy @Text@ from a @Builder@ with a default
--- buffer size.  The construction work takes place if and when the
--- relevant part of the lazy @Text@ is demanded.
-toLazyText :: Builder -> L.Text
-toLazyText = toLazyTextWith smallChunkSize
-
--- | /O(n)./ Extract a lazy @Text@ from a @Builder@, using the given
--- size for the initial buffer.  The construction work takes place if
--- and when the relevant part of the lazy @Text@ is demanded.
---
--- If the initial buffer is too small to hold all data, subsequent
--- buffers will be the default buffer size.
-toLazyTextWith :: Int -> Builder -> L.Text
-toLazyTextWith chunkSize m = L.fromChunks (runST $
-  newBuffer chunkSize >>= runBuilder (m `append` flush) (const (return [])))
-
--- | /O(1)./ Pop the strict @Text@ we have constructed so far, if any,
--- yielding a new chunk in the result lazy @Text@.
-flush :: Builder
-flush = Builder $ \ k buf@(Buffer p o u l) ->
-    if u == 0
-    then k buf
-    else do arr <- A.unsafeFreeze p
-            let !b = Buffer p (o+u) 0 l
-                !t = Text arr o u
-            ts <- inlineInterleaveST (k b)
-            return $! t : ts
-
-------------------------------------------------------------------------
-
--- | Sequence an ST operation on the buffer
-withBuffer :: (forall s. Buffer s -> ST s (Buffer s)) -> Builder
-withBuffer f = Builder $ \k buf -> f buf >>= k
-{-# INLINE withBuffer #-}
-
--- | Get the size of the buffer
-withSize :: (Int -> Builder) -> Builder
-withSize f = Builder $ \ k buf@(Buffer _ _ _ l) ->
-    runBuilder (f l) k buf
-{-# INLINE withSize #-}
-
--- | Map the resulting list of texts.
-mapBuilder :: ([S.Text] -> [S.Text]) -> Builder
-mapBuilder f = Builder (fmap f .)
-
-------------------------------------------------------------------------
-
--- | Ensure that there are at least @n@ many elements available.
-ensureFree :: Int -> Builder
-ensureFree !n = withSize $ \ l ->
-    if n <= l
-    then empty
-    else flush `append'` withBuffer (const (newBuffer (max n smallChunkSize)))
-{-# INLINE [0] ensureFree #-}
-
-writeAtMost :: Int -> (forall s. A.MArray s -> Int -> ST s Int) -> Builder
-writeAtMost n f = ensureFree n `append'` withBuffer (writeBuffer f)
-{-# INLINE [0] writeAtMost #-}
-
--- | Ensure that @n@ many elements are available, and then use @f@ to
--- write some elements into the memory.
-writeN :: Int -> (forall s. A.MArray s -> Int -> ST s ()) -> Builder
-writeN n f = writeAtMost n (\ p o -> f p o >> return n)
-{-# INLINE writeN #-}
-
-writeBuffer :: (A.MArray s -> Int -> ST s Int) -> Buffer s -> ST s (Buffer s)
-writeBuffer f (Buffer p o u l) = do
-    n <- f p (o+u)
-    return $! Buffer p o (u+n) (l-n)
-{-# INLINE writeBuffer #-}
-
-newBuffer :: Int -> ST s (Buffer s)
-newBuffer size = do
-    arr <- A.new size
-    return $! Buffer arr 0 0 size
-{-# INLINE newBuffer #-}
-
-------------------------------------------------------------------------
--- Some nice rules for Builder
-
--- This function makes GHC understand that 'writeN' and 'ensureFree'
--- are *not* recursive in the precense of the rewrite rules below.
--- This is not needed with GHC 7+.
-append' :: Builder -> Builder -> Builder
-append' (Builder f) (Builder g) = Builder (f . g)
-{-# INLINE append' #-}
-
-{-# RULES
-
-"append/writeAtMost" forall a b (f::forall s. A.MArray s -> Int -> ST s Int)
-                           (g::forall s. A.MArray s -> Int -> ST s Int) ws.
-    append (writeAtMost a f) (append (writeAtMost b g) ws) =
-        append (writeAtMost (a+b) (\marr o -> f marr o >>= \ n ->
-                                    g marr (o+n) >>= \ m ->
-                                    let s = n+m in s `seq` return s)) ws
-
-"writeAtMost/writeAtMost" forall a b (f::forall s. A.MArray s -> Int -> ST s Int)
-                           (g::forall s. A.MArray s -> Int -> ST s Int).
-    append (writeAtMost a f) (writeAtMost b g) =
-        writeAtMost (a+b) (\marr o -> f marr o >>= \ n ->
-                            g marr (o+n) >>= \ m ->
-                            let s = n+m in s `seq` return s)
-
-"ensureFree/ensureFree" forall a b .
-    append (ensureFree a) (ensureFree b) = ensureFree (max a b)
-
-"flush/flush"
-    append flush flush = flush
-
- #-}
+import Data.Text.Lazy.Builder.Internal

Data/Text/Lazy/Builder/Int.hs

-{-# LANGUAGE BangPatterns, CPP, MagicHash, UnboxedTuples #-}
+{-# LANGUAGE BangPatterns, CPP, MagicHash, RankNTypes, UnboxedTuples #-}
 
 -- Module:      Data.Text.Lazy.Builder.Int
--- Copyright:   (c) 2011 MailRank, Inc.
+-- Copyright:   (c) 2013 Bryan O'Sullivan
+--              (c) 2011 MailRank, Inc.
 -- License:     BSD3
 -- Maintainer:  Bryan O'Sullivan <bos@serpentine.com>
 -- Stability:   experimental
 
 import Data.Int (Int8, Int16, Int32, Int64)
 import Data.Monoid (mempty)
+import qualified Data.ByteString.Unsafe as B
 import Data.Text.Lazy.Builder.Functions ((<>), i2d)
-import Data.Text.Lazy.Builder
+import Data.Text.Lazy.Builder.Internal
+import Data.Text.Lazy.Builder.Int.Digits (digits)
+import Data.Text.Array
 import Data.Word (Word, Word8, Word16, Word32, Word64)
 import GHC.Base (quotInt, remInt)
 import GHC.Num (quotRemInteger)
 import GHC.Types (Int(..))
+import Control.Monad.ST
 
 #ifdef  __GLASGOW_HASKELL__
 # if __GLASGOW_HASKELL__ < 611
 {-# RULES "decimal/Word32" decimal = positive :: Word32 -> Builder #-}
 {-# RULES "decimal/Word64" decimal = positive :: Word64 -> Builder #-}
 {-# RULES "decimal/Integer" decimal = integer 10 :: Integer -> Builder #-}
-decimal i
-  | i < 0     = singleton '-' <>
-                if i <= -128
-                then positive (-(i `quot` 10)) <> digit (-(i `rem` 10))
-                else positive (-i)
-  | otherwise = positive i
+decimal i = decimal' (<= -128) i
 
 boundedDecimal :: (Integral a, Bounded a) => a -> Builder
 {-# SPECIALIZE boundedDecimal :: Int -> Builder #-}
 {-# SPECIALIZE boundedDecimal :: Int16 -> Builder #-}
 {-# SPECIALIZE boundedDecimal :: Int32 -> Builder #-}
 {-# SPECIALIZE boundedDecimal :: Int64 -> Builder #-}
-boundedDecimal i
-    | i < 0     = singleton '-' <>
-                  if i == minBound
-                  then positive (-(i `quot` 10)) <> digit (-(i `rem` 10))
-                  else positive (-i)
+boundedDecimal i = decimal' (== minBound) i
+
+decimal' :: (Integral a) => (a -> Bool) -> a -> Builder
+{-# INLINE decimal' #-}
+decimal' p i
+    | i < 0 = if p i
+              then let j = -(i `quot` 10)
+                       !n = countDigits j
+                   in writeN (n + 2) $ \marr off -> do
+                       unsafeWrite marr off minus
+                       posDecimal marr (off+1) n j
+                       unsafeWrite marr (off+n+1) (i2w (-(i `rem` 10)))
+              else let j = -i
+                       !n = countDigits j
+                   in writeN (n + 1) $ \marr off ->
+                       unsafeWrite marr off minus >> posDecimal marr (off+1) n j
     | otherwise = positive i
 
 positive :: (Integral a) => a -> Builder
 {-# SPECIALIZE positive :: Word16 -> Builder #-}
 {-# SPECIALIZE positive :: Word32 -> Builder #-}
 {-# SPECIALIZE positive :: Word64 -> Builder #-}
-positive = go
-  where go n | n < 10    = digit n
-             | otherwise = go (n `quot` 10) <> digit (n `rem` 10)
+positive i
+    | i < 10    = writeN 1 $ \marr off -> unsafeWrite marr off (i2w i)
+    | otherwise = let !n = countDigits i
+                  in writeN n $ \marr off -> posDecimal marr off n i
+
+posDecimal :: (Integral a) =>
+              forall s. MArray s -> Int -> Int -> a -> ST s ()
+{-# INLINE posDecimal #-}
+posDecimal marr off0 ds v0 = go (off0 + ds - 1) v0
+  where go off v
+           | v >= 100 = do
+               write2 off $ let u = v `rem` 100
+                            in u + u
+               go (off - 2) (v `quot` 100)
+           | v < 10    = unsafeWrite marr off (i2w v)
+           | otherwise = write2 off (v + v)
+        write2 off i = do
+          unsafeWrite marr off $ get (i + 1)
+          unsafeWrite marr (off - 1) $ get i
+        get i = fromIntegral $ B.unsafeIndex digits (fromIntegral i) :: Word16
+
+minus, zero :: Word16
+{-# INLINE minus #-}
+{-# INLINE zero #-}
+minus = 45
+zero = 48
+
+i2w :: (Integral a) => a -> Word16
+{-# INLINE i2w #-}
+i2w v = zero + fromIntegral v
+
+countDigits :: (Integral a) => a -> Int
+{-# INLINE countDigits #-}
+countDigits v0 = go 1 (fromIntegral v0 :: Word64)
+  where go !k v
+           | v < 10    = k
+           | v < 100   = k + 1
+           | v < 1000  = k + 2
+           | v < 1000000000000 =
+               k + if v < 100000000
+                   then if v < 1000000
+                        then if v < 10000
+                             then 3
+                             else 4 + fin v 100000
+                        else 6 + fin v 10000000
+                   else if v < 10000000000
+                        then 8 + fin v 1000000000
+                        else 10 + fin v 100000000000
+           | otherwise = go (k + 12) (v `quot` 1000000000000)
+        fin v n = if v >= n then 1 else 0
 
 hexadecimal :: Integral a => a -> Builder
 {-# SPECIALIZE hexadecimal :: Int -> Builder #-}

Data/Text/Lazy/Builder/Int/Digits.hs

+{-# LANGUAGE OverloadedStrings #-}
+
+-- Module:      Data.Text.Lazy.Builder.Int.Digits
+-- Copyright:   (c) 2013 Bryan O'Sullivan
+-- License:     BSD3
+-- Maintainer:  Bryan O'Sullivan <bos@serpentine.com>
+-- Stability:   experimental
+-- Portability: portable
+--
+-- This module exists because the C preprocessor does things that we
+-- shall not speak of when confronted with Haskell multiline strings.
+
+module Data.Text.Lazy.Builder.Int.Digits (digits) where
+
+import Data.ByteString.Char8 (ByteString)
+
+digits :: ByteString
+digits = "0001020304050607080910111213141516171819\
+         \2021222324252627282930313233343536373839\
+         \4041424344454647484950515253545556575859\
+         \6061626364656667686970717273747576777879\
+         \8081828384858687888990919293949596979899"

Data/Text/Lazy/Builder/Internal.hs

+{-# LANGUAGE BangPatterns, CPP, Rank2Types #-}
+
+-----------------------------------------------------------------------------
+-- |
+-- Module      : Data.Text.Lazy.Builder.Internal
+-- Copyright   : (c) 2013 Bryan O'Sullivan
+--               (c) 2010 Johan Tibell
+-- License     : BSD3-style (see LICENSE)
+--
+-- Maintainer  : Johan Tibell <johan.tibell@gmail.com>
+-- Stability   : experimental
+-- Portability : portable to Hugs and GHC
+--
+-- Efficient construction of lazy @Text@ values.  The principal
+-- operations on a @Builder@ are @singleton@, @fromText@, and
+-- @fromLazyText@, which construct new builders, and 'mappend', which
+-- concatenates two builders.
+--
+-- To get maximum performance when building lazy @Text@ values using a
+-- builder, associate @mappend@ calls to the right.  For example,
+-- prefer
+--
+-- > singleton 'a' `mappend` (singleton 'b' `mappend` singleton 'c')
+--
+-- to
+--
+-- > singleton 'a' `mappend` singleton 'b' `mappend` singleton 'c'
+--
+-- as the latter associates @mappend@ to the left.
+--
+-----------------------------------------------------------------------------
+
+module Data.Text.Lazy.Builder.Internal
+   ( -- * Public API
+     -- ** The Builder type
+     Builder
+   , toLazyText
+   , toLazyTextWith
+
+     -- ** Constructing Builders
+   , singleton
+   , fromText
+   , fromLazyText
+   , fromString
+
+     -- ** Flushing the buffer state
+   , flush
+
+     -- * Internal functions
+   , append'
+   , ensureFree
+   , writeN
+   ) where
+
+import Control.Monad.ST (ST, runST)
+import Data.Bits ((.&.))
+import Data.Monoid (Monoid(..))
+import Data.Text.Internal (Text(..))
+import Data.Text.Lazy.Internal (smallChunkSize)
+import Data.Text.Unsafe (inlineInterleaveST)
+import Data.Text.UnsafeChar (ord, unsafeWrite)
+import Data.Text.UnsafeShift (shiftR)
+import Prelude hiding (map, putChar)
+
+import qualified Data.String as String
+import qualified Data.Text as S
+import qualified Data.Text.Array as A
+import qualified Data.Text.Lazy as L
+
+------------------------------------------------------------------------
+
+-- | A @Builder@ is an efficient way to build lazy @Text@ values.
+-- There are several functions for constructing builders, but only one
+-- to inspect them: to extract any data, you have to turn them into
+-- lazy @Text@ values using @toLazyText@.
+--
+-- Internally, a builder constructs a lazy @Text@ by filling arrays
+-- piece by piece.  As each buffer is filled, it is \'popped\' off, to
+-- become a new chunk of the resulting lazy @Text@.  All this is
+-- hidden from the user of the @Builder@.
+newtype Builder = Builder {
+     -- Invariant (from Data.Text.Lazy):
+     --      The lists include no null Texts.
+     runBuilder :: forall s. (Buffer s -> ST s [S.Text])
+                -> Buffer s
+                -> ST s [S.Text]
+   }
+
+instance Monoid Builder where
+   mempty  = empty
+   {-# INLINE mempty #-}
+   mappend = append
+   {-# INLINE mappend #-}
+   mconcat = foldr mappend mempty
+   {-# INLINE mconcat #-}
+
+instance String.IsString Builder where
+    fromString = fromString
+    {-# INLINE fromString #-}
+
+instance Show Builder where
+    show = show . toLazyText
+
+instance Eq Builder where
+    a == b = toLazyText a == toLazyText b
+
+instance Ord Builder where
+    a <= b = toLazyText a <= toLazyText b
+
+------------------------------------------------------------------------
+
+-- | /O(1)./ The empty @Builder@, satisfying
+--
+--  * @'toLazyText' 'empty' = 'L.empty'@
+--
+empty :: Builder
+empty = Builder (\ k buf -> k buf)
+{-# INLINE empty #-}
+
+-- | /O(1)./ A @Builder@ taking a single character, satisfying
+--
+--  * @'toLazyText' ('singleton' c) = 'L.singleton' c@
+--
+singleton :: Char -> Builder
+singleton c = writeAtMost 2 $ \ marr o ->
+    if n < 0x10000
+    then A.unsafeWrite marr o (fromIntegral n) >> return 1
+    else do
+        A.unsafeWrite marr o lo
+        A.unsafeWrite marr (o+1) hi
+        return 2
+  where n = ord c
+        m = n - 0x10000
+        lo = fromIntegral $ (m `shiftR` 10) + 0xD800
+        hi = fromIntegral $ (m .&. 0x3FF) + 0xDC00
+{-# INLINE singleton #-}
+
+------------------------------------------------------------------------
+
+-- | /O(1)./ The concatenation of two builders, an associative
+-- operation with identity 'empty', satisfying
+--
+--  * @'toLazyText' ('append' x y) = 'L.append' ('toLazyText' x) ('toLazyText' y)@
+--
+append :: Builder -> Builder -> Builder
+append (Builder f) (Builder g) = Builder (f . g)
+{-# INLINE [0] append #-}
+
+-- TODO: Experiment to find the right threshold.
+copyLimit :: Int
+copyLimit = 128
+
+-- This function attempts to merge small @Text@ values instead of
+-- treating each value as its own chunk.  We may not always want this.
+
+-- | /O(1)./ A @Builder@ taking a 'S.Text', satisfying
+--
+--  * @'toLazyText' ('fromText' t) = 'L.fromChunks' [t]@
+--
+fromText :: S.Text -> Builder
+fromText t@(Text arr off l)
+    | S.null t       = empty
+    | l <= copyLimit = writeN l $ \marr o -> A.copyI marr o arr off (l+o)
+    | otherwise      = flush `append` mapBuilder (t :)
+{-# INLINE [1] fromText #-}
+
+{-# RULES
+"fromText/pack" forall s .
+        fromText (S.pack s) = fromString s
+ #-}
+
+-- | /O(1)./ A Builder taking a @String@, satisfying
+--
+--  * @'toLazyText' ('fromString' s) = 'L.fromChunks' [S.pack s]@
+--
+fromString :: String -> Builder
+fromString str = Builder $ \k (Buffer p0 o0 u0 l0) ->
+    let loop !marr !o !u !l [] = k (Buffer marr o u l)
+        loop marr o u l s@(c:cs)
+            | l <= 1 = do
+                arr <- A.unsafeFreeze marr
+                let !t = Text arr o u
+                marr' <- A.new chunkSize
+                ts <- inlineInterleaveST (loop marr' 0 0 chunkSize s)
+                return $ t : ts
+            | otherwise = do
+                n <- unsafeWrite marr (o+u) c
+                loop marr o (u+n) (l-n) cs
+    in loop p0 o0 u0 l0 str
+  where
+    chunkSize = smallChunkSize
+{-# INLINE fromString #-}
+
+-- | /O(1)./ A @Builder@ taking a lazy @Text@, satisfying
+--
+--  * @'toLazyText' ('fromLazyText' t) = t@
+--
+fromLazyText :: L.Text -> Builder
+fromLazyText ts = flush `append` mapBuilder (L.toChunks ts ++)
+{-# INLINE fromLazyText #-}
+
+------------------------------------------------------------------------
+
+-- Our internal buffer type
+data Buffer s = Buffer {-# UNPACK #-} !(A.MArray s)
+                       {-# UNPACK #-} !Int  -- offset
+                       {-# UNPACK #-} !Int  -- used units
+                       {-# UNPACK #-} !Int  -- length left
+
+------------------------------------------------------------------------
+
+-- | /O(n)./ Extract a lazy @Text@ from a @Builder@ with a default
+-- buffer size.  The construction work takes place if and when the
+-- relevant part of the lazy @Text@ is demanded.
+toLazyText :: Builder -> L.Text
+toLazyText = toLazyTextWith smallChunkSize
+
+-- | /O(n)./ Extract a lazy @Text@ from a @Builder@, using the given
+-- size for the initial buffer.  The construction work takes place if
+-- and when the relevant part of the lazy @Text@ is demanded.
+--
+-- If the initial buffer is too small to hold all data, subsequent
+-- buffers will be the default buffer size.
+toLazyTextWith :: Int -> Builder -> L.Text
+toLazyTextWith chunkSize m = L.fromChunks (runST $
+  newBuffer chunkSize >>= runBuilder (m `append` flush) (const (return [])))
+
+-- | /O(1)./ Pop the strict @Text@ we have constructed so far, if any,
+-- yielding a new chunk in the result lazy @Text@.
+flush :: Builder
+flush = Builder $ \ k buf@(Buffer p o u l) ->
+    if u == 0
+    then k buf
+    else do arr <- A.unsafeFreeze p
+            let !b = Buffer p (o+u) 0 l
+                !t = Text arr o u
+            ts <- inlineInterleaveST (k b)
+            return $! t : ts
+
+------------------------------------------------------------------------
+
+-- | Sequence an ST operation on the buffer
+withBuffer :: (forall s. Buffer s -> ST s (Buffer s)) -> Builder
+withBuffer f = Builder $ \k buf -> f buf >>= k
+{-# INLINE withBuffer #-}
+
+-- | Get the size of the buffer
+withSize :: (Int -> Builder) -> Builder
+withSize f = Builder $ \ k buf@(Buffer _ _ _ l) ->
+    runBuilder (f l) k buf
+{-# INLINE withSize #-}
+
+-- | Map the resulting list of texts.
+mapBuilder :: ([S.Text] -> [S.Text]) -> Builder
+mapBuilder f = Builder (fmap f .)
+
+------------------------------------------------------------------------
+
+-- | Ensure that there are at least @n@ many elements available.
+ensureFree :: Int -> Builder
+ensureFree !n = withSize $ \ l ->
+    if n <= l
+    then empty
+    else flush `append'` withBuffer (const (newBuffer (max n smallChunkSize)))
+{-# INLINE [0] ensureFree #-}
+
+writeAtMost :: Int -> (forall s. A.MArray s -> Int -> ST s Int) -> Builder
+writeAtMost n f = ensureFree n `append'` withBuffer (writeBuffer f)
+{-# INLINE [0] writeAtMost #-}
+
+-- | Ensure that @n@ many elements are available, and then use @f@ to
+-- write some elements into the memory.
+writeN :: Int -> (forall s. A.MArray s -> Int -> ST s ()) -> Builder
+writeN n f = writeAtMost n (\ p o -> f p o >> return n)
+{-# INLINE writeN #-}
+
+writeBuffer :: (A.MArray s -> Int -> ST s Int) -> Buffer s -> ST s (Buffer s)
+writeBuffer f (Buffer p o u l) = do
+    n <- f p (o+u)
+    return $! Buffer p o (u+n) (l-n)
+{-# INLINE writeBuffer #-}
+
+newBuffer :: Int -> ST s (Buffer s)
+newBuffer size = do
+    arr <- A.new size
+    return $! Buffer arr 0 0 size
+{-# INLINE newBuffer #-}
+
+------------------------------------------------------------------------
+-- Some nice rules for Builder
+
+-- This function makes GHC understand that 'writeN' and 'ensureFree'
+-- are *not* recursive in the precense of the rewrite rules below.
+-- This is not needed with GHC 7+.
+append' :: Builder -> Builder -> Builder
+append' (Builder f) (Builder g) = Builder (f . g)
+{-# INLINE append' #-}
+
+{-# RULES
+
+"append/writeAtMost" forall a b (f::forall s. A.MArray s -> Int -> ST s Int)
+                           (g::forall s. A.MArray s -> Int -> ST s Int) ws.
+    append (writeAtMost a f) (append (writeAtMost b g) ws) =
+        append (writeAtMost (a+b) (\marr o -> f marr o >>= \ n ->
+                                    g marr (o+n) >>= \ m ->
+                                    let s = n+m in s `seq` return s)) ws
+
+"writeAtMost/writeAtMost" forall a b (f::forall s. A.MArray s -> Int -> ST s Int)
+                           (g::forall s. A.MArray s -> Int -> ST s Int).
+    append (writeAtMost a f) (writeAtMost b g) =
+        writeAtMost (a+b) (\marr o -> f marr o >>= \ n ->
+                            g marr (o+n) >>= \ m ->
+                            let s = n+m in s `seq` return s)
+
+"ensureFree/ensureFree" forall a b .
+    append (ensureFree a) (ensureFree b) = ensureFree (max a b)
+
+"flush/flush"
+    append flush flush = flush
+
+ #-}

Data/Text/Lazy/Encoding.hs

-{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE BangPatterns,CPP #-}
+#if __GLASGOW_HASKELL__ >= 702
+{-# LANGUAGE Trustworthy #-}
+#endif
 -- |
 -- Module      : Data.Text.Lazy.Encoding
 -- Copyright   : (c) 2009, 2010 Bryan O'Sullivan
     -- * Decoding ByteStrings to Text
     -- $strict
       decodeASCII
+    , decodeLatin1
     , decodeUtf8
     , decodeUtf16LE
     , decodeUtf16BE
 import Data.Bits ((.&.))
 import Data.Text.Encoding.Error (OnDecodeError, UnicodeException, strictDecode)
 import Data.Text.Lazy.Internal (Text(..), chunk, empty, foldrChunks)
-import System.IO.Unsafe (unsafePerformIO)
 import qualified Data.ByteString as S
 import qualified Data.ByteString.Lazy as B
 import qualified Data.ByteString.Lazy.Internal as B
 import qualified Data.Text.Encoding as TE
 import qualified Data.Text.Lazy.Encoding.Fusion as E
 import qualified Data.Text.Lazy.Fusion as F
+import Data.Text.Unsafe (unsafeDupablePerformIO)
 
 -- $strict
 --
 -- | /Deprecated/.  Decode a 'ByteString' containing 7-bit ASCII
 -- encoded text.
 --
--- This function is deprecated.  Use 'decodeUtf8' instead.
+-- This function is deprecated.  Use 'decodeLatin1' instead.
 decodeASCII :: B.ByteString -> Text
 decodeASCII = decodeUtf8
 {-# DEPRECATED decodeASCII "Use decodeUtf8 instead" #-}
 
+-- | Decode a 'ByteString' containing Latin-1 (aka ISO-8859-1) encoded text.
+decodeLatin1 :: B.ByteString -> Text
+decodeLatin1 = foldr (chunk . TE.decodeLatin1) empty . B.toChunks
+
 -- | Decode a 'ByteString' containing UTF-8 encoded text.
 decodeUtf8With :: OnDecodeError -> B.ByteString -> Text
 decodeUtf8With onErr bs0 = fast bs0
 -- input before it can return a result.  If you need lazy (streaming)
 -- decoding, use 'decodeUtf8With' in lenient mode.
 decodeUtf8' :: B.ByteString -> Either UnicodeException Text
-decodeUtf8' bs = unsafePerformIO $ do
+decodeUtf8' bs = unsafeDupablePerformIO $ do
                    let t = decodeUtf8 bs
                    try (evaluate (rnf t `seq` t))
   where

Data/Text/Lazy/Encoding/Fusion.hs

 import qualified Data.Text.Encoding.Utf8 as U8
 import qualified Data.Text.Encoding.Utf16 as U16
 import qualified Data.Text.Encoding.Utf32 as U32
-import System.IO.Unsafe (unsafePerformIO)
+import Data.Text.Unsafe (unsafeDupablePerformIO)
 import Foreign.ForeignPtr (withForeignPtr, ForeignPtr)
 import Foreign.Storable (pokeByteOff)
 import Data.ByteString.Internal (mallocByteString, memcpy)
 -- | /O(n)/ Convert a 'Stream' 'Word8' to a lazy 'ByteString'.
 unstreamChunks :: Int -> Stream Word8 -> ByteString
 unstreamChunks chunkSize (Stream next s0 len0) = chunk s0 (upperBound 4 len0)
-  where chunk s1 len1 = unsafePerformIO $ do
+  where chunk s1 len1 = unsafeDupablePerformIO $ do
           let len = max 4 (min len1 chunkSize)
           mallocByteString len >>= loop len 0 s1
           where

Data/Text/Lazy/Fusion.hs

 
 import Prelude hiding (length)
 import qualified Data.Text.Fusion.Common as S
+import Control.Monad.ST (runST)
 import Data.Text.Fusion.Internal
 import Data.Text.Fusion.Size (isEmpty, unknownSize)
 import Data.Text.Lazy.Internal
         where Iter c d = iter t i
 {-# INLINE [0] stream #-}
 
-data UC s = UC s {-# UNPACK #-} !Int
-
 -- | /O(n)/ Convert a 'Stream Char' into a 'Text', using the given
 -- chunk size.
 unstreamChunks :: Int -> Stream Char -> Text
-unstreamChunks chunkSize (Stream next s0 len0)
+unstreamChunks !chunkSize (Stream next s0 len0)
   | isEmpty len0 = Empty
   | otherwise    = outer s0
   where
-    outer s = {-# SCC "unstreamChunks/outer" #-}
-              case next s of
+    outer so = {-# SCC "unstreamChunks/outer" #-}
+              case next so of
                 Done       -> Empty
                 Skip s'    -> outer s'
-                Yield x s' -> I.Text arr 0 len `chunk` outer s''
-                  where (arr, UC s'' len) = A.run2 fill
-                        fill = do a <- A.new unknownLength
-                                  unsafeWrite a 0 x >>= inner a unknownLength s'
-                        unknownLength = 4
-    inner marr len s !i
-        | i + 1 >= chunkSize = return (marr, UC s i)
-        | i + 1 >= len       = {-# SCC "unstreamChunks/resize" #-} do
-            let newLen = min (len `shiftL` 1) chunkSize
-            marr' <- A.new newLen
-            A.copyM marr' 0 marr 0 len
-            inner marr' newLen s i
-        | otherwise =
-            {-# SCC "unstreamChunks/inner" #-}
-            case next s of
-              Done        -> return (marr, UC s i)
-              Skip s'     -> inner marr len s' i
-              Yield x s'  -> do d <- unsafeWrite marr i x
-                                inner marr len s' (i+d)
+                Yield x s' -> runST $ do
+                                a <- A.new unknownLength
+                                unsafeWrite a 0 x >>= inner a unknownLength s'
+                    where unknownLength = 4
+      where
+        inner marr !len s !i
+            | i + 1 >= chunkSize = finish marr i s
+            | i + 1 >= len       = {-# SCC "unstreamChunks/resize" #-} do
+                let newLen = min (len `shiftL` 1) chunkSize
+                marr' <- A.new newLen
+                A.copyM marr' 0 marr 0 len
+                inner marr' newLen s i
+            | otherwise =
+                {-# SCC "unstreamChunks/inner" #-}
+                case next s of
+                  Done        -> finish marr i s
+                  Skip s'     -> inner marr len s' i
+                  Yield x s'  -> do d <- unsafeWrite marr i x
+                                    inner marr len s' (i+d)
+        finish marr len s' = do
+          arr <- A.unsafeFreeze marr
+          return (I.Text arr 0 len `Chunk` outer s')
 {-# INLINE [0] unstreamChunks #-}
 
 -- | /O(n)/ Convert a 'Stream Char' into a 'Text', using

Data/Text/Lazy/IO.hs

 {-# LANGUAGE BangPatterns, CPP, RecordWildCards #-}
+#if __GLASGOW_HASKELL__ >= 702
+{-# LANGUAGE Trustworthy #-}
+#endif
 -- |
 -- Module      : Data.Text.Lazy.IO
 -- Copyright   : (c) 2009, 2010 Bryan O'Sullivan,

Data/Text/Lazy/Read.hs

-{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE OverloadedStrings, CPP #-}
+#if __GLASGOW_HASKELL__ >= 702
+{-# LANGUAGE Trustworthy #-}
+#endif
 
 -- |
 -- Module      : Data.Text.Lazy.Read

Data/Text/Read.hs

-{-# LANGUAGE OverloadedStrings, UnboxedTuples #-}
+{-# LANGUAGE OverloadedStrings, UnboxedTuples, CPP #-}
+#if __GLASGOW_HASKELL__ >= 702
+{-# LANGUAGE Trustworthy #-}
+#endif
 
 -- |
 -- Module      : Data.Text.Read

Data/Text/Search.hs

               delta | nextInPattern = nlen + 1
                     | c == z        = skip + 1
                     | otherwise     = 1
-              nextInPattern         = mask .&. swizzle (hindex' (i+nlen)) == 0
+                where nextInPattern = mask .&. swizzle (hindex' (i+nlen)) == 0
               !(mask :* skip)       = buildTable 0 0 (nlen-2)
     scanOne c = loop 0
         where loop !i | i >= hlen     = []

Data/Text/Unsafe.hs

     (
       inlineInterleaveST
     , inlinePerformIO
+    , unsafeDupablePerformIO
     , Iter(..)
     , iter
     , iter_
 import Data.Text.Unsafe.Base (inlineInterleaveST, inlinePerformIO)
 import Data.Text.UnsafeChar (unsafeChr)
 import qualified Data.Text.Array as A
+#if __GLASGOW_HASKELL__ >= 611
+import GHC.IO (unsafeDupablePerformIO)
+#else
+import GHC.IOBase (unsafeDupablePerformIO)
+#endif
 
 -- | /O(1)/ A variant of 'head' for non-empty 'Text'. 'unsafeHead'
 -- omits the check for the empty case, so there is an obligation on
Add a comment to this file

benchmarks/haskell/Benchmarks/Builder.hs

File contents unchanged.

Add a comment to this file

benchmarks/haskell/Benchmarks/Pure.hs

File contents unchanged.

Add a comment to this file

benchmarks/text-benchmarks.cabal

File contents unchanged.

 }
 
 /*
+ * The ISO 8859-1 (aka latin-1) code points correspond exactly to the first 256 unicode
+ * code-points, therefore we can trivially convert from a latin-1 encoded bytestring to
+ * an UTF16 array
+ */
+void
+_hs_text_decode_latin1(uint16_t *dest, const uint8_t const *src,
+                       const uint8_t const *srcend)
+{
+  const uint8_t *p = src;
+
+#if defined(__i386__) || defined(__x86_64__)
+  /* This optimization works on a little-endian systems by using
+     (aligned) 32-bit loads instead of 8-bit loads
+   */
+
+  /* consume unaligned prefix */
+  while (p != srcend && (uintptr_t)p & 0x3)
+    *dest++ = *p++;
+
+  /* iterate over 32-bit aligned loads */
+  while (p < srcend - 3) {
+    const uint32_t w = *((const uint32_t *)p);
+
+    *dest++ =  w        & 0xff;
+    *dest++ = (w >> 8)  & 0xff;
+    *dest++ = (w >> 16) & 0xff;
+    *dest++ = (w >> 24) & 0xff;
+
+    p += 4;
+  }
+#endif
+
+  /* handle unaligned suffix */
+  while (p != srcend)
+    *dest++ = *p++;
+}
+
+/*
  * A best-effort decoder. Runs until it hits either end of input or
  * the start of an invalid byte sequence.
  *

tests/Tests/Properties.hs

 import Test.Framework.Providers.QuickCheck2 (testProperty)
 import qualified Data.Bits as Bits (shiftL, shiftR)
 import qualified Data.ByteString as B
+import qualified Data.ByteString.Lazy as BL
 import qualified Data.List as L
 import qualified Data.Text as T
 import qualified Data.Text.Encoding as E
 tl_chunk_unchunk    = (TL.fromChunks . TL.toChunks) `eq` id
 tl_from_to_strict   = (TL.fromStrict . TL.toStrict) `eq` id
 
+-- Note: this silently truncates code-points > 255 to 8-bit due to 'B.pack'
+encodeL1 :: T.Text -> B.ByteString
+encodeL1 = B.pack . map (fromIntegral . fromEnum) . T.unpack
+encodeLazyL1 :: TL.Text -> BL.ByteString
+encodeLazyL1 = BL.fromChunks . map encodeL1 . TL.toChunks
+
 t_ascii t    = E.decodeASCII (E.encodeUtf8 a) == a
     where a  = T.map (\c -> chr (ord c `mod` 128)) t
 tl_ascii t   = EL.decodeASCII (EL.encodeUtf8 a) == a
     where a  = TL.map (\c -> chr (ord c `mod` 128)) t
+t_latin1 t   = E.decodeLatin1 (encodeL1 a) == a
+    where a  = T.map (\c -> chr (ord c `mod` 256)) t
+tl_latin1 t  = EL.decodeLatin1 (encodeLazyL1 a) == a
+    where a  = TL.map (\c -> chr (ord c `mod` 256)) t
 t_utf8       = forAll genUnicode $ (E.decodeUtf8 . E.encodeUtf8) `eq` id
 t_utf8'      = forAll genUnicode $ (E.decodeUtf8' . E.encodeUtf8) `eq` (id . Right)
 tl_utf8      = forAll genUnicode $ (EL.decodeUtf8 . EL.encodeUtf8) `eq` id
     testGroup "transcoding" [
       testProperty "t_ascii" t_ascii,
       testProperty "tl_ascii" tl_ascii,
+      testProperty "t_latin1" t_latin1,
+      testProperty "tl_latin1" tl_latin1,
       testProperty "t_utf8" t_utf8,
       testProperty "t_utf8'" t_utf8',
       testProperty "tl_utf8" tl_utf8,

tests/Tests/Regressions.hs

 
 import Control.Exception (SomeException, handle)
 import System.IO
-import Test.HUnit (assertFailure)
+import Test.HUnit (assertBool, assertFailure)
 import qualified Data.ByteString as B
+import Data.ByteString.Char8 ()
 import qualified Data.ByteString.Lazy as LB
 import qualified Data.Text as T
+import qualified Data.Text.Encoding as TE
 import qualified Data.Text.IO as T
 import qualified Data.Text.Lazy as LT
 import qualified Data.Text.Lazy.Encoding as LE
     power | maxBound == (2147483647::Int) = 28
           | otherwise                     = 60 :: Int
 
+-- Reported by John Millikin: a UTF-8 decode error handler could
+-- return a bogus substitution character, which we would write without
+-- checking.
+utf8_decode_unsafe :: IO ()
+utf8_decode_unsafe = do
+  let t = TE.decodeUtf8With (\_ _ -> Just '\xdc00') "\x80"
+  assertBool "broken error recovery shouldn't break us" (t == "\xfffd")
+
 tests :: F.Test
 tests = F.testGroup "Regressions"
     [ F.testCase "hGetContents_crash" hGetContents_crash
     , F.testCase "lazy_encode_crash" lazy_encode_crash
     , F.testCase "replicate_crash" replicate_crash
+    , F.testCase "utf8_decode_unsafe" utf8_decode_unsafe
     ]

tests/text-tests.cabal

     Data.Text.Lazy.Builder
     Data.Text.Lazy.Builder.Functions
     Data.Text.Lazy.Builder.Int
+    Data.Text.Lazy.Builder.Int.Digits
+    Data.Text.Lazy.Builder.Internal
     Data.Text.Lazy.Builder.RealFloat
     Data.Text.Lazy.Builder.RealFloat.Functions
     Data.Text.Lazy.Encoding
 name:           text
-version:        0.11.2.3
+version:        0.11.2.4
 homepage:       https://github.com/bos/text
 bug-reports:    https://github.com/bos/text/issues
 synopsis:       An efficient packed Unicode text type.
     Data.Text.Fusion.Size
     Data.Text.IO.Internal
     Data.Text.Lazy.Builder.Functions
+    Data.Text.Lazy.Builder.Int.Digits
+    Data.Text.Lazy.Builder.Internal
     Data.Text.Lazy.Builder.RealFloat.Functions
     Data.Text.Lazy.Encoding.Fusion
     Data.Text.Lazy.Fusion
     cpp-options: -DHAVE_DEEPSEQ
   else
     build-depends: extensible-exceptions
-    extensions: PatternSignatures
+    extensions: ScopedTypeVariables
 
   ghc-options: -Wall -funbox-strict-fields -O2
   if impl(ghc >= 6.8)
       cpp-options: -DINTEGER_GMP
       build-depends: integer-gmp >= 0.2
 
-
   if impl(ghc >= 6.9) && impl(ghc < 6.11)
     cpp-options: -DINTEGER_GMP
     build-depends: integer >= 0.1 && < 0.2
 
 test-suite tests
   type:           exitcode-stdio-1.0
-  hs-source-dirs: tests
+  hs-source-dirs: tests .
   main-is:        Tests.hs
-  -- c-sources:      cbits/cbits.c
+  c-sources:      cbits/cbits.c
 
   ghc-options:
     -Wall -threaded -O0 -rtsopts
   build-depends:
     HUnit >= 1.2,
     QuickCheck >= 2.4,
+    array,
     base,
     bytestring,
     deepseq,
     random,
     test-framework >= 0.4,
     test-framework-hunit >= 0.2,
-    test-framework-quickcheck2 >= 0.2,
-    text
+    test-framework-quickcheck2 >= 0.2
+
+  if impl(ghc >= 6.11)
+    if flag(integer-simple)
+      cpp-options: -DINTEGER_SIMPLE
+      build-depends: integer-simple >= 0.1 && < 0.5
+    else
+      cpp-options: -DINTEGER_GMP
+      build-depends: integer-gmp >= 0.2
+
+  if impl(ghc >= 6.9) && impl(ghc < 6.11)
+    cpp-options: -DINTEGER_GMP
+    build-depends: integer >= 0.1 && < 0.2
 
 source-repository head
   type:     git
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.