Commits

Anonymous committed edb76b3 Merge

Merge branch 'master' of https://github.com/bos/text

  • Participants
  • Parent commits 0ad243a, d67c5b8

Comments (0)

Files changed (131)

-^(?:dist|tests/benchmarks/dist|tests/tests/dist)$
-^tests/benchmarks/.*\.txt$
+^(?:dist|benchmarks/dist|tests/coverage|tests/dist)$
+^benchmarks/.*\.txt$
 ^tests/text-testdata.tar.bz2$
 ^tests/(?:\.hpc|bm|qc|qc-hpc|stdio-hpc|text/test)$
-\.(?:aux|eventlog|h[ip]|log|[oa]|orig|prof|ps|rej|swp)$
+\.(?:aux|eventlog|h[ip]|log|[oa]|orig|prof|ps|rej|swp|tix)$
 ~$
+
 syntax: glob
 .\#*
+cabal-dev
+cabal.sandbox.config
+\.cabal-sandbox
 407937739e9e764f1ae0f1f9ca454c42dca38772 0.11.1.10
 8b981edd27befa4c2dd334fcb7db22ac67e22b67 0.11.1.11
 204da16b5098531bdf858c388e2620238ef2aa5e 0.11.1.12
+6a3d847a56a69d0514a79cb212cb218271ad0917 0.11.1.13
+1d2c6fa9092c6a4000b2abdd9d01f3efcd477be5 0.11.2.0
+78219784cf3652cc662805bf2971bd62d80210a9 0.11.2.1
+4297307ebc11ad677cfba6b40319e7e5e2c0cfee 0.11.2.3
+7fa79662b66aade97fe49394977213fe6432942e 0.11.3.0
+d99cd091cdf71ce807a4255f6cc509c3154f51ea 0.11.3.1
-{-# LANGUAGE BangPatterns, CPP, Rank2Types, UnboxedTuples #-}
+{-# LANGUAGE BangPatterns, CPP, MagicHash, Rank2Types, UnboxedTuples #-}
 {-# OPTIONS_GHC -fno-warn-orphans #-}
+#if __GLASGOW_HASKELL__ >= 702
+{-# LANGUAGE Trustworthy #-}
+#endif
 
 -- |
 -- Module      : Data.Text
--- Copyright   : (c) 2009, 2010, 2011 Bryan O'Sullivan,
+-- Copyright   : (c) 2009, 2010, 2011, 2012 Bryan O'Sullivan,
 --               (c) 2009 Duncan Coutts,
 --               (c) 2008, 2009 Tom Harper
 --
     , partition
 
     -- , findSubstring
-    
+
     -- * Indexing
     -- $index
     , index
     , findIndex
     , count
 
-    -- * Zipping and unzipping
+    -- * Zipping
     , zip
     , zipWith
 
     -- -* Ordered text
     -- , sort
+
+    -- * Low level operations
+    , copy
     ) where
 
 import Prelude (Char, Bool(..), Int, Maybe(..), String,
                 Eq(..), Ord(..), Ordering(..), (++),
                 Read(..), Show(..),
                 (&&), (||), (+), (-), (.), ($), ($!), (>>), (*),
-                div, maxBound, not, return, otherwise)
+                maxBound, not, return, otherwise, quot)
 #if defined(HAVE_DEEPSEQ)
 import Control.DeepSeq (NFData)
 #endif
 #endif
 import Data.Char (isSpace)
 import Data.Data (Data(gfoldl, toConstr, gunfold, dataTypeOf))
-#if __GLASGOW_HASKELL__ >= 612
 import Data.Data (mkNoRepType)
-#else
-import Data.Data (mkNorepType)
-#endif
 import Control.Monad (foldM)
 import qualified Data.Text.Array as A
 import qualified Data.List as L
 import qualified Data.Text.Lazy as L
 import Data.Int (Int64)
 #endif
+#if __GLASGOW_HASKELL__ >= 702
+import qualified GHC.CString as GHC
+#else
+import qualified GHC.Base as GHC
+#endif
+import GHC.Prim (Addr#)
 
 -- $strict
 --
 
 -- This instance preserves data abstraction at the cost of inefficiency.
 -- We omit reflection services for the sake of data abstraction.
--- 
+--
 -- This instance was created by copying the behavior of Data.Set and
 -- Data.Map. If you feel a mistake has been made, please feel free to
 -- submit improvements.
   gfoldl f z txt = z pack `f` (unpack txt)
   toConstr _     = P.error "Data.Text.Text.toConstr"
   gunfold _ _    = P.error "Data.Text.Text.gunfold"
-#if __GLASGOW_HASKELL__ >= 612
   dataTypeOf _   = mkNoRepType "Data.Text.Text"
-#else
-  dataTypeOf _   = mkNorepType "Data.Text.Text"
-#endif
 
 -- | /O(n)/ Compare two 'Text' values lexicographically.
 compareText :: Text -> Text -> Ordering
 -- | /O(n)/ Convert a 'String' into a 'Text'.  Subject to
 -- fusion.  Performs replacement on invalid scalar values.
 pack :: String -> Text
-pack = unstream . S.streamList . L.map safe
+pack = unstream . S.map safe . S.streamList
 {-# INLINE [1] pack #-}
 
 -- | /O(n)/ Convert a Text into a String.  Subject to fusion.
 unpack = S.unstreamList . stream
 {-# INLINE [1] unpack #-}
 
+-- | /O(n)/ Convert a literal string into a Text.  Subject to fusion.
+unpackCString# :: Addr# -> Text
+unpackCString# addr# = unstream (S.streamCString# addr#)
+{-# NOINLINE unpackCString# #-}
+
+{-# RULES "TEXT literal" forall a.
+    unstream (S.map safe (S.streamList (GHC.unpackCString# a)))
+      = unpackCString# a #-}
+
+{-# RULES "TEXT literal UTF8" forall a.
+    unstream (S.map safe (S.streamList (GHC.unpackCStringUtf8# a)))
+      = unpackCString# a #-}
+
 -- | /O(1)/ Convert a character into a Text.  Subject to fusion.
 -- Performs replacement on invalid scalar values.
 singleton :: Char -> Text
     | otherwise = replicateChar l c `append` t `append` replicateChar r c
   where len = length t
         d   = k - len
-        r   = d `div` 2
+        r   = d `quot` 2
         l   = d - r
 {-# INLINE center #-}
 
 -- @t@ repeated @n@ times.
 replicate :: Int -> Text -> Text
 replicate n t@(Text a o l)
-    | n <= 0 || l <= 0      = empty
-    | n == 1                = t
-    | isSingleton t         = replicateChar n (unsafeHead t)
-    | n <= maxBound `div` l = Text (A.run x) 0 len
-    | otherwise             = overflowError "replicate"
+    | n <= 0 || l <= 0       = empty
+    | n == 1                 = t
+    | isSingleton t          = replicateChar n (unsafeHead t)
+    | n <= maxBound `quot` l = Text (A.run x) 0 len
+    | otherwise              = overflowError "replicate"
   where
     len = l * n
     x = do
     where go !i | i >= len || q c       = i
                 | otherwise             = go (i+d)
                 where Iter c d          = iter t i
-    
+
 -- | /O(n)/ Group characters in a string by equality.
 group :: Text -> [Text]
 group = groupBy (==)
 -- > splitOn "\r\n" "a\r\nb\r\nd\r\ne" == ["a","b","d","e"]
 -- > splitOn "aaa"  "aaaXaaaXaaaXaaa"  == ["","X","X","X",""]
 -- > splitOn "x"    "x"                == ["",""]
--- 
+--
 -- and
 --
 -- > intercalate s . splitOn s         == id
 {-# RULES
 "TEXT isPrefixOf -> fused" [~1] forall s t.
     isPrefixOf s t = S.isPrefixOf (stream s) (stream t)
-"TEXT isPrefixOf -> unfused" [1] forall s t.
-    S.isPrefixOf (stream s) (stream t) = isPrefixOf s t
   #-}
 
 -- | /O(n)/ The 'isSuffixOf' function takes two 'Text's and returns
 
 overflowError :: String -> a
 overflowError fun = P.error $ "Data.Text." ++ fun ++ ": size overflow"
+
+-- | /O(n)/ Make a distinct copy of the given string, sharing no
+-- storage with the original string.
+--
+-- As an example, suppose you read a large string, of which you need
+-- only a small portion.  If you do not use 'copy', the entire original
+-- array will be kept alive in memory by the smaller string. Making a
+-- copy \"breaks the link\" to the original array, allowing it to be
+-- garbage collected if there are no other live references to it.
+copy :: Text -> Text
+copy (Text arr off len) = Text (A.run go) 0 len
+  where
+    go = do
+      marr <- A.new len
+      A.copyI marr 0 arr off len
+      return marr

Data/Text/Array.hs

 -- | Create an uninitialized mutable array.
 new :: forall s. Int -> ST s (MArray s)
 new n
-  | n < 0 || n .&. highBit /= 0 = error $ "Data.Text.Array.new: size overflow"
+  | n < 0 || n .&. highBit /= 0 = array_size_error
   | otherwise = ST $ \s1# ->
        case newByteArray# len# s1# of
          (# s2#, marr# #) -> (# s2#, MArray marr#
         highBit    = maxBound `xor` (maxBound `shiftR` 1)
 {-# INLINE new #-}
 
+array_size_error :: a
+array_size_error = error "Data.Text.Array.new: size overflow"
+
 -- | Freeze a mutable array. Do not mutate the 'MArray' afterwards!
 unsafeFreeze :: MArray s -> ST s Array
 unsafeFreeze MArray{..} = ST $ \s# ->
                  (marr,b) <- k
                  arr <- unsafeFreeze marr
                  return (arr,b))
+{-# INLINE run2 #-}
 
 -- | Copy some elements of a mutable array.
 copyM :: MArray s               -- ^ Destination

Data/Text/Encoding.hs

-{-# LANGUAGE BangPatterns, CPP, ForeignFunctionInterface, MagicHash,
+{-# LANGUAGE BangPatterns, CPP, ForeignFunctionInterface, GeneralizedNewtypeDeriving, MagicHash,
     UnliftedFFITypes #-}
+#if __GLASGOW_HASKELL__ >= 702
+{-# LANGUAGE Trustworthy #-}
+#endif
 -- |
 -- Module      : Data.Text.Encoding
 -- Copyright   : (c) 2009, 2010, 2011 Bryan O'Sullivan,
     -- * Decoding ByteStrings to Text
     -- $strict
       decodeASCII
+    , decodeLatin1
     , decodeUtf8
     , decodeUtf16LE
     , decodeUtf16BE
     , decodeUtf32LEWith
     , decodeUtf32BEWith
 
+    -- ** Stream oriented decoding
+    -- $stream
+    , streamDecodeUtf8
+    , streamDecodeUtf8With
+    , Decoding(..)
+
     -- * Encoding Text to ByteStrings
     , encodeUtf8
     , encodeUtf16LE
 #else
 import Control.Monad.ST (unsafeIOToST, unsafeSTToIO)
 #endif
+import Control.Monad.ST (runST)
 import Data.Bits ((.&.))
 import Data.ByteString as B
 import Data.ByteString.Internal as B
+import Data.Text ()
 import Data.Text.Encoding.Error (OnDecodeError, UnicodeException, strictDecode)
-import Data.Text.Internal (Text(..), textP)
+import Data.Text.Internal (Text(..), safe, textP)
+import Data.Text.Private (runText)
 import Data.Text.UnsafeChar (ord, unsafeWrite)
 import Data.Text.UnsafeShift (shiftL, shiftR)
-import Data.Word (Word8)
+import Data.Word (Word8, Word32)
 import Foreign.C.Types (CSize)
 import Foreign.ForeignPtr (withForeignPtr)
 import Foreign.Marshal.Utils (with)
-import Foreign.Ptr (Ptr, minusPtr, plusPtr)
-import Foreign.Storable (peek, poke)
+import Foreign.Ptr (Ptr, minusPtr, nullPtr, plusPtr)
+import Foreign.Storable (Storable, peek, poke)
 import GHC.Base (MutableByteArray#)
-import System.IO.Unsafe (unsafePerformIO)
 import qualified Data.Text.Array as A
 import qualified Data.Text.Encoding.Fusion as E
 import qualified Data.Text.Encoding.Utf16 as U16
 import qualified Data.Text.Fusion as F
+import Data.Text.Unsafe (unsafeDupablePerformIO)
+
+#include "text_cbits.h"
 
 -- $strict
 --
 -- | /Deprecated/.  Decode a 'ByteString' containing 7-bit ASCII
 -- encoded text.
 --
--- This function is deprecated.  Use 'decodeUtf8' instead.
+-- This function is deprecated.  Use 'decodeLatin1' instead.
 decodeASCII :: ByteString -> Text
 decodeASCII = decodeUtf8
 {-# DEPRECATED decodeASCII "Use decodeUtf8 instead" #-}
 
+-- | Decode a 'ByteString' containing Latin-1 (aka ISO-8859-1) encoded text.
+--
+-- 'decodeLatin1' is semantically equivalent to
+--  @Data.Text.pack . Data.ByteString.Char8.unpack@
+decodeLatin1 :: ByteString -> Text
+decodeLatin1 (PS fp off len) = textP a 0 len
+ where
+  a = A.run (A.new len >>= unsafeIOToST . go)
+  go dest = withForeignPtr fp $ \ptr -> do
+    c_decode_latin1 (A.maBA dest) (ptr `plusPtr` off) (ptr `plusPtr` (off+len))
+    return dest
+
 -- | Decode a 'ByteString' containing UTF-8 encoded text.
 decodeUtf8With :: OnDecodeError -> ByteString -> Text
-decodeUtf8With onErr (PS fp off len) = textP (fst a) 0 (snd a)
+decodeUtf8With onErr (PS fp off len) = runText $ \done -> do
+  let go dest = withForeignPtr fp $ \ptr ->
+        with (0::CSize) $ \destOffPtr -> do
+          let end = ptr `plusPtr` (off + len)
+              loop curPtr = do
+                curPtr' <- c_decode_utf8 (A.maBA dest) destOffPtr curPtr end
+                if curPtr' == end
+                  then do
+                    n <- peek destOffPtr
+                    unsafeSTToIO (done dest (fromIntegral n))
+                  else do
+                    x <- peek curPtr'
+                    case onErr desc (Just x) of
+                      Nothing -> loop $ curPtr' `plusPtr` 1
+                      Just c -> do
+                        destOff <- peek destOffPtr
+                        w <- unsafeSTToIO $
+                             unsafeWrite dest (fromIntegral destOff) (safe c)
+                        poke destOffPtr (destOff + fromIntegral w)
+                        loop $ curPtr' `plusPtr` 1
+          loop (ptr `plusPtr` off)
+  (unsafeIOToST . go) =<< A.new len
  where
-  a = A.run2 (A.new len >>= unsafeIOToST . go)
   desc = "Data.Text.Encoding.decodeUtf8: Invalid UTF-8 stream"
-  go dest = withForeignPtr fp $ \ptr ->
-    with (0::CSize) $ \destOffPtr -> do
-      let end = ptr `plusPtr` (off + len)
-          loop curPtr = do
-            curPtr' <- c_decode_utf8 (A.maBA dest) destOffPtr curPtr end
-            if curPtr' == end
-              then do
-                n <- peek destOffPtr
-                return (dest,fromIntegral n)
-              else do
-                x <- peek curPtr'
-                case onErr desc (Just x) of
-                  Nothing -> loop $ curPtr' `plusPtr` 1
-                  Just c -> do
-                    destOff <- peek destOffPtr
-                    w <- unsafeSTToIO $
-                         unsafeWrite dest (fromIntegral destOff) c
-                    poke destOffPtr (destOff + fromIntegral w)
-                    loop $ curPtr' `plusPtr` 1
-      loop (ptr `plusPtr` off)
 {- INLINE[0] decodeUtf8With #-}
 
+-- $stream
+--
+-- The 'streamDecodeUtf8' and 'streamDecodeUtf8With' functions accept
+-- a 'ByteString' that represents a possibly incomplete input (e.g. a
+-- packet from a network stream) that may not end on a UTF-8 boundary.
+--
+-- The first element of the result is the maximal chunk of 'Text' that
+-- can be decoded from the given input. The second is a function which
+-- accepts another 'ByteString'. That string will be assumed to
+-- directly follow the string that was passed as input to the original
+-- function, and it will in turn be decoded.
+--
+-- To help understand the use of these functions, consider the Unicode
+-- string @\"hi &#9731;\"@. If encoded as UTF-8, this becomes @\"hi
+-- \\xe2\\x98\\x83\"@; the final @\'&#9731;\'@ is encoded as 3 bytes.
+--
+-- Now suppose that we receive this encoded string as 3 packets that
+-- are split up on untidy boundaries: @[\"hi \\xe2\", \"\\x98\",
+-- \"\\x83\"]@. We cannot decode the entire Unicode string until we
+-- have received all three packets, but we would like to make progress
+-- as we receive each one.
+--
+-- @
+-- let 'Some' t0 f0 = 'streamDecodeUtf8' \"hi \\xe2\"
+-- t0 == \"hi \" :: 'Text'
+-- @
+--
+-- We use the continuation @f0@ to decode our second packet.
+--
+-- @
+-- let 'Some' t1 f1 = f0 \"\\x98\"
+-- t1 == \"\"
+-- @
+--
+-- We could not give @f0@ enough input to decode anything, so it
+-- returned an empty string. Once we feed our second continuation @f1@
+-- the last byte of input, it will make progress.
+--
+-- @
+-- let 'Some' t2 f2 = f1 \"\\x83\"
+-- t2 == \"&#9731;\"
+-- @
+--
+-- If given invalid input, an exception will be thrown by the function
+-- or continuation where it is encountered.
+
+-- | A stream oriented decoding result.
+data Decoding = Some Text ByteString (ByteString -> Decoding)
+
+instance Show Decoding where
+    showsPrec d (Some t bs _) = showParen (d > prec) $
+                                showString "Some " . showsPrec prec' t .
+                                showChar ' ' . showsPrec prec' bs .
+                                showString " _"
+      where prec = 10; prec' = prec + 1
+
+newtype CodePoint = CodePoint Word32 deriving (Eq, Show, Num, Storable)
+newtype DecoderState = DecoderState Word32 deriving (Eq, Show, Num, Storable)
+
+-- | Decode, in a stream oriented way, a 'ByteString' containing UTF-8
+-- encoded text that is known to be valid.
+--
+-- If the input contains any invalid UTF-8 data, an exception will be
+-- thrown (either by this function or a continuation) that cannot be
+-- caught in pure code.  For more control over the handling of invalid
+-- data, use 'streamDecodeUtf8With'.
+streamDecodeUtf8 :: ByteString -> Decoding
+streamDecodeUtf8 = streamDecodeUtf8With strictDecode
+
+-- | Decode, in a stream oriented way, a 'ByteString' containing UTF-8
+-- encoded text.
+streamDecodeUtf8With :: OnDecodeError -> ByteString -> Decoding
+streamDecodeUtf8With onErr = decodeChunk 0 0
+ where
+  -- We create a slightly larger than necessary buffer to accommodate a
+  -- potential surrogate pair started in the last buffer
+  decodeChunk :: CodePoint -> DecoderState -> ByteString -> Decoding
+  decodeChunk codepoint0 state0 bs@(PS fp off len) =
+    runST $ (unsafeIOToST . decodeChunkToBuffer) =<< A.new (len+1)
+   where
+    decodeChunkToBuffer :: A.MArray s -> IO Decoding
+    decodeChunkToBuffer dest = withForeignPtr fp $ \ptr ->
+      with (0::CSize) $ \destOffPtr ->
+      with codepoint0 $ \codepointPtr ->
+      with state0 $ \statePtr ->
+      with nullPtr $ \curPtrPtr ->
+        let end = ptr `plusPtr` (off + len)
+            loop curPtr = do
+              poke curPtrPtr curPtr
+              curPtr' <- c_decode_utf8_with_state (A.maBA dest) destOffPtr
+                         curPtrPtr end codepointPtr statePtr
+              state <- peek statePtr
+              case state of
+                UTF8_REJECT -> do
+                  -- We encountered an encoding error
+                  x <- peek curPtr'
+                  case onErr desc (Just x) of
+                    Nothing -> loop $ curPtr' `plusPtr` 1
+                    Just c -> do
+                      destOff <- peek destOffPtr
+                      w <- unsafeSTToIO $
+                           unsafeWrite dest (fromIntegral destOff) (safe c)
+                      poke destOffPtr (destOff + fromIntegral w)
+                      poke statePtr 0
+                      loop $ curPtr' `plusPtr` 1
+
+                _ -> do
+                  -- We encountered the end of the buffer while decoding
+                  n <- peek destOffPtr
+                  codepoint <- peek codepointPtr
+                  chunkText <- unsafeSTToIO $ do
+                      arr <- A.unsafeFreeze dest
+                      return $! textP arr 0 (fromIntegral n)
+                  lastPtr <- peek curPtrPtr
+                  let left = lastPtr `minusPtr` curPtr
+                  return $ Some chunkText (B.drop left bs)
+                           (decodeChunk codepoint state)
+        in loop (ptr `plusPtr` off)
+  desc = "Data.Text.Encoding.streamDecodeUtf8With: Invalid UTF-8 stream"
+
 -- | Decode a 'ByteString' containing UTF-8 encoded text that is known
 -- to be valid.
 --
 {-# RULES "STREAM stream/decodeUtf8 fusion" [1]
     forall bs. F.stream (decodeUtf8 bs) = E.streamUtf8 strictDecode bs #-}
 
--- | Decode a 'ByteString' containing UTF-8 encoded text..
+-- | Decode a 'ByteString' containing UTF-8 encoded text.
 --
 -- If the input contains any invalid UTF-8 data, the relevant
 -- exception will be returned, otherwise the decoded text.
 decodeUtf8' :: ByteString -> Either UnicodeException Text
-decodeUtf8' = unsafePerformIO . try . evaluate . decodeUtf8With strictDecode
+decodeUtf8' = unsafeDupablePerformIO . try . evaluate . decodeUtf8With strictDecode
 {-# INLINE decodeUtf8' #-}
 
 -- | Encode text using UTF-8 encoding.
 encodeUtf8 :: Text -> ByteString
-encodeUtf8 (Text arr off len) = unsafePerformIO $ do
+encodeUtf8 (Text arr off len) = unsafeDupablePerformIO $ do
   let size0 = max len 4
   mallocByteString size0 >>= start size0 off 0
  where
 foreign import ccall unsafe "_hs_text_decode_utf8" c_decode_utf8
     :: MutableByteArray# s -> Ptr CSize
     -> Ptr Word8 -> Ptr Word8 -> IO (Ptr Word8)
+
+foreign import ccall unsafe "_hs_text_decode_utf8_state" c_decode_utf8_with_state
+    :: MutableByteArray# s -> Ptr CSize
+    -> Ptr (Ptr Word8) -> Ptr Word8
+    -> Ptr CodePoint -> Ptr DecoderState -> IO (Ptr Word8)
+
+foreign import ccall unsafe "_hs_text_decode_latin1" c_decode_latin1
+    :: MutableByteArray# s -> Ptr Word8 -> Ptr Word8 -> IO ()

Data/Text/Encoding/Error.hs

 {-# LANGUAGE CPP, DeriveDataTypeable #-}
+#if __GLASGOW_HASKELL__ >= 702
+{-# LANGUAGE Trustworthy #-}
+#endif
 -- |
 -- Module      : Data.Text.Encoding.Error
 -- Copyright   : (c) Bryan O'Sullivan 2009
     , replace
     ) where
 
-#if __GLASGOW_HASKELL__ >= 610
+import Control.DeepSeq (NFData (..))
 import Control.Exception (Exception, throw)
-#else
-import Control.Exception.Extensible (Exception, throw)
-#endif
 import Data.Typeable (Typeable)
 import Data.Word (Word8)
 import Numeric (showHex)
     = "Cannot encode character '\\x" ++ showHex (fromEnum c) ("': " ++ desc)
 showUnicodeException (EncodeError desc Nothing)
     = "Cannot encode input: " ++ desc
-                     
+
 instance Show UnicodeException where
     show = showUnicodeException
 
 instance Exception UnicodeException
 
+instance NFData UnicodeException where
+    rnf (DecodeError desc w) = rnf desc `seq` rnf w `seq` ()
+    rnf (EncodeError desc c) = rnf desc `seq` rnf c `seq` ()
+
 -- | Throw a 'UnicodeException' if decoding fails.
 strictDecode :: OnDecodeError
 strictDecode desc c = throw (DecodeError desc c)

Data/Text/Encoding/Fusion.hs

 import Data.Word (Word8, Word16, Word32)
 import Foreign.ForeignPtr (withForeignPtr, ForeignPtr)
 import Foreign.Storable (pokeByteOff)
-import System.IO.Unsafe (unsafePerformIO)
 import qualified Data.ByteString as B
 import qualified Data.ByteString.Unsafe as B
 import qualified Data.Text.Encoding.Utf8 as U8
 import qualified Data.Text.Encoding.Utf16 as U16
 import qualified Data.Text.Encoding.Utf32 as U32
+import Data.Text.Unsafe (unsafeDupablePerformIO)
 
 streamASCII :: ByteString -> Stream Char
 streamASCII bs = Stream next 0 (maxSize l)
 
 -- | /O(n)/ Convert a 'Stream' 'Word8' to a 'ByteString'.
 unstream :: Stream Word8 -> ByteString
-unstream (Stream next s0 len) = unsafePerformIO $ do
+unstream (Stream next s0 len) = unsafeDupablePerformIO $ do
     let mlen = upperBound 4 len
     mallocByteString mlen >>= loop mlen 0 s0
     where

Data/Text/Encoding/Fusion/Common.hs

     -- * Restreaming
     -- Restreaming is the act of converting from one 'Stream'
     -- representation to another.
-      restreamUtf8
-    , restreamUtf16LE
+      restreamUtf16LE
     , restreamUtf16BE
     , restreamUtf32LE
     , restreamUtf32BE
 import Data.Text.UnsafeChar (ord)
 import Data.Text.UnsafeShift (shiftR)
 import Data.Word (Word8)
-import qualified Data.Text.Encoding.Utf8 as U8
-
--- | /O(n)/ Convert a Stream Char into a UTF-8 encoded Stream Word8.
-restreamUtf8 :: Stream Char -> Stream Word8
-restreamUtf8 (Stream next0 s0 len) = Stream next (RS0 s0) (len * 2)
-  where
-    next (RS0 s) = case next0 s of
-        Done              -> Done
-        Skip s'           -> Skip (RS0 s')
-        Yield x s'
-            | n <= 0x7F   -> Yield c  (RS0 s')
-            | n <= 0x07FF -> Yield a2 (RS1 s' b2)
-            | n <= 0xFFFF -> Yield a3 (RS2 s' b3 c3)
-            | otherwise   -> Yield a4 (RS3 s' b4 c4 d4)
-          where
-            n  = ord x
-            c  = fromIntegral n
-            (a2,b2) = U8.ord2 x
-            (a3,b3,c3) = U8.ord3 x
-            (a4,b4,c4,d4) = U8.ord4 x
-    next (RS1 s x2)       = Yield x2 (RS0 s)
-    next (RS2 s x2 x3)    = Yield x2 (RS1 s x3)
-    next (RS3 s x2 x3 x4) = Yield x2 (RS2 s x3 x4)
-    {-# INLINE next #-}
-{-# INLINE restreamUtf8 #-}
 
 restreamUtf16BE :: Stream Char -> Stream Word8
 restreamUtf16BE (Stream next0 s0 len) = Stream next (RS0 s0) (len * 2)

Data/Text/Encoding/Utf8.hs

 validate4 :: Word8 -> Word8 -> Word8 -> Word8 -> Bool
 {-# INLINE validate4 #-}
 validate4 x1 x2 x3 x4 = validate4_1 || validate4_2 || validate4_3
-  where 
+  where
     validate4_1 = x1 == 0xF0 &&
                   between x2 0x90 0xBF &&
                   between x3 0x80 0xBF &&

Data/Text/Foreign.hs

     , fromPtr
     , useAsPtr
     , asForeignPtr
+    -- ** Encoding as UTF-8
+    , peekCStringLen
+    , withCStringLen
     -- * Unsafe conversion code
     , lengthWord16
     , unsafeCopyToPtr
 #else
 import Control.Monad.ST (unsafeIOToST)
 #endif
+import Data.ByteString.Unsafe (unsafePackCStringLen, unsafeUseAsCStringLen)
+import Data.Text.Encoding (decodeUtf8, encodeUtf8)
 import Data.Text.Internal (Text(..), empty)
 import Data.Text.Unsafe (lengthWord16)
-import qualified Data.Text.Array as A
 import Data.Word (Word16)
+import Foreign.C.String (CStringLen)
+import Foreign.ForeignPtr (ForeignPtr, mallocForeignPtrArray, withForeignPtr)
 import Foreign.Marshal.Alloc (allocaBytes)
 import Foreign.Ptr (Ptr, castPtr, plusPtr)
-import Foreign.ForeignPtr (ForeignPtr, mallocForeignPtrArray, withForeignPtr)
 import Foreign.Storable (peek, poke)
+import qualified Data.Text.Array as A
 
 -- $interop
 --
   fp <- mallocForeignPtrArray len
   withForeignPtr fp $ unsafeCopyToPtr t
   return (fp, I16 len)
+
+-- | /O(n)/ Decode a C string with explicit length, which is assumed
+-- to have been encoded as UTF-8. If decoding fails, a
+-- 'UnicodeException' is thrown.
+peekCStringLen :: CStringLen -> IO Text
+peekCStringLen cs = do
+  bs <- unsafePackCStringLen cs
+  return $! decodeUtf8 bs
+
+-- | Marshal a 'Text' into a C string encoded as UTF-8 in temporary
+-- storage, with explicit length information. The encoded string may
+-- contain NUL bytes, and is not followed by a trailing NUL byte.
+--
+-- The temporary storage is freed when the subcomputation terminates
+-- (either normally or via an exception), so the pointer to the
+-- temporary storage must /not/ be used after this function returns.
+withCStringLen :: Text -> (CStringLen -> IO a) -> IO a
+withCStringLen t act = unsafeUseAsCStringLen (encodeUtf8 t) act

Data/Text/Fusion.hs

                 fromIntegral, otherwise)
 import Data.Bits ((.&.))
 import Data.Text.Internal (Text(..))
+import Data.Text.Private (runText)
 import Data.Text.UnsafeChar (ord, unsafeChr, unsafeWrite)
 import Data.Text.UnsafeShift (shiftL, shiftR)
 import qualified Data.Text.Array as A
 import Data.Text.Fusion.Size
 import qualified Data.Text.Internal as I
 import qualified Data.Text.Encoding.Utf16 as U16
-import qualified Prelude as P
 
 default(Int)
 
 
 -- | /O(n)/ Convert a 'Stream Char' into a 'Text'.
 unstream :: Stream Char -> Text
-unstream (Stream next0 s0 len) = I.textP (P.fst a) 0 (P.snd a)
-  where
-    a = A.run2 (A.new mlen >>= \arr -> outer arr mlen s0 0)
-      where mlen = upperBound 4 len
-    outer arr top = loop
-      where
+unstream (Stream next0 s0 len) = runText $ \done -> do
+  let mlen = upperBound 4 len
+  arr0 <- A.new mlen
+  let outer arr top = loop
+       where
         loop !s !i =
             case next0 s of
-              Done          -> return (arr, i)
+              Done          -> done arr i
               Skip s'       -> loop s' i
               Yield x s'
                 | j >= top  -> {-# SCC "unstream/resize" #-} do
                                   loop s' (i+d)
                 where j | ord x < 0x10000 = i
                         | otherwise       = i + 1
+  outer arr0 mlen s0 0
 {-# INLINE [0] unstream #-}
 {-# RULES "STREAM stream/unstream fusion" forall s. stream (unstream s) = s #-}
 
                                arr' <- A.new top'
                                A.copyM arr' 0 arr 0 top
                                outer arr' top' z s i
-                | otherwise -> do let (z',c) = f z x
-                                  d <- unsafeWrite arr i c
+                | otherwise -> do d <- unsafeWrite arr i c
                                   loop z' s' (i+d)
-                where j | ord x < 0x10000 = i
+                where (z',c) = f z x
+                      j | ord c < 0x10000 = i
                         | otherwise       = i + 1
 {-# INLINE [0] mapAccumL #-}

Data/Text/Fusion/Common.hs

-{-# LANGUAGE BangPatterns, Rank2Types #-}
+{-# LANGUAGE BangPatterns, MagicHash, Rank2Types #-}
 -- |
 -- Module      : Data.Text.Fusion.Common
--- Copyright   : (c) Bryan O'Sullivan 2009
+-- Copyright   : (c) Bryan O'Sullivan 2009, 2012
 --
 -- License     : BSD-style
 -- Maintainer  : bos@serpentine.com, rtomharper@googlemail.com,
       singleton
     , streamList
     , unstreamList
+    , streamCString#
 
     -- * Basic interface
     , cons
                 (&&), fromIntegral, otherwise)
 import qualified Data.List as L
 import qualified Prelude as P
+import Data.Bits (shiftL)
 import Data.Int (Int64)
 import Data.Text.Fusion.Internal
 import Data.Text.Fusion.CaseMapping (foldMapping, lowerMapping, upperMapping)
 import Data.Text.Fusion.Size
+import GHC.Prim (Addr#, chr#, indexCharOffAddr#, ord#)
+import GHC.Types (Char(..), Int(..))
 
 singleton :: Char -> Stream Char
 singleton c = Stream next False 1
 
 {-# RULES "STREAM streamList/unstreamList fusion" forall s. streamList (unstreamList s) = s #-}
 
+-- | Stream the UTF-8-like packed encoding used by GHC to represent
+-- constant strings in generated code.
+--
+-- This encoding uses the byte sequence "\xc0\x80" to represent NUL,
+-- and the string is NUL-terminated.
+streamCString# :: Addr# -> Stream Char
+streamCString# addr = Stream step 0 unknownSize
+  where
+    step !i
+        | b == 0    = Done
+        | b <= 0x7f = Yield (C# b#) (i+1)
+        | b <= 0xdf = let !c = chr $ ((b-0xc0) `shiftL` 6) + next 1
+                      in Yield c (i+2)
+        | b <= 0xef = let !c = chr $ ((b-0xe0) `shiftL` 12) +
+                                      (next 1  `shiftL` 6) +
+                                       next 2
+                      in Yield c (i+3)
+        | otherwise = let !c = chr $ ((b-0xf0) `shiftL` 18) +
+                                      (next 1  `shiftL` 12) +
+                                      (next 2  `shiftL` 6) +
+                                       next 3
+                      in Yield c (i+4)
+      where b      = I# (ord# b#)
+            next n = I# (ord# (at# (i+n))) - 0x80
+            !b#    = at# i
+    at# (I# i#) = indexCharOffAddr# addr i#
+    chr (I# i#) = C# (chr# i#)
+{-# INLINE [0] streamCString# #-}
+
 -- ----------------------------------------------------------------------------
 -- * Basic stream functions
 
 
 -- | /O(n)/ Adds a character to the front of a Stream Char.
 cons :: Char -> Stream Char -> Stream Char
-cons w (Stream next0 s0 len) = Stream next (C1 s0) (len+1)
+cons !w (Stream next0 s0 len) = Stream next (C1 s0) (len+1)
     where
       next (C1 s) = Yield w (C0 s)
       next (C0 s) = case next0 s of
       loop_head !s = case next s of
                       Yield x _ -> x
                       Skip s'   -> loop_head s'
-                      Done      -> streamError "head" "Empty stream"
+                      Done      -> head_empty
 {-# INLINE [0] head #-}
 
+head_empty :: a
+head_empty = streamError "head" "Empty stream"
+{-# NOINLINE head_empty #-}
+
 -- | /O(1)/ Returns the first character and remainder of a 'Stream
 -- Char', or 'Nothing' if empty.  Subject to array fusion.
 uncons :: Stream Char -> Maybe (Char, Stream Char)
 -- of 'lengthI', but can short circuit if the count of characters is
 -- greater than the number, and hence be more efficient.
 compareLengthI :: Integral a => Stream Char -> a -> Ordering
-compareLengthI (Stream next s0 len) n = 
+compareLengthI (Stream next s0 len) n =
     case exactly len of
       Nothing -> loop_cmp 0 s0
       Just i  -> compare (fromIntegral i) n
       loop (Yield x1 s1') (Yield x2 s2') = x1 == x2 &&
                                            loop (next1 s1') (next2 s2')
 {-# INLINE [0] isPrefixOf #-}
-{-# SPECIALISE isPrefixOf :: Stream Char -> Stream Char -> Bool #-}
 
 -- ----------------------------------------------------------------------------
 -- * Searching

Data/Text/Fusion/Internal.hs

       loop (Yield x1 s1') (Yield x2 s2') = x1 == x2 &&
                                            loop (next1 s1') (next2 s2')
 {-# INLINE [0] eq #-}
-{-# SPECIALISE eq :: Stream Char -> Stream Char -> Bool #-}
 
 cmp :: (Ord a) => Stream a -> Stream a -> Ordering
 cmp (Stream next1 s1 _) (Stream next2 s2 _) = loop (next1 s1) (next2 s2)
             EQ    -> loop (next1 s1') (next2 s2')
             other -> other
 {-# INLINE [0] cmp #-}
-{-# SPECIALISE cmp :: Stream Char -> Stream Char -> Ordering #-}
 
 -- | The empty stream.
 empty :: Stream a

Data/Text/Fusion/Size.hs

 
 mul :: Int -> Int -> Int
 mul m n
-    | m <= maxBound `div` n = m * n
-    | otherwise             = overflowError
+    | m <= maxBound `quot` n = m * n
+    | otherwise              = overflowError
 {-# INLINE mul #-}
 
 mulSize :: Size -> Size -> Size
 {-# LANGUAGE BangPatterns, CPP, RecordWildCards, ScopedTypeVariables #-}
+#if __GLASGOW_HASKELL__ >= 702
+{-# LANGUAGE Trustworthy #-}
+#endif
 -- |
 -- Module      : Data.Text.IO
 -- Copyright   : (c) 2009, 2010 Bryan O'Sullivan,
 module Data.Text.IO
     (
     -- * Performance
-    -- $performance 
+    -- $performance
 
     -- * Locale support
     -- $locale
     , appendFile
     -- * Operations on handles
     , hGetContents
+    , hGetChunk
     , hGetLine
     , hPutStr
     , hPutStrLn
     ) where
 
 import Data.Text (Text)
-import Prelude hiding (appendFile, catch, getContents, getLine, interact,
+import Prelude hiding (appendFile, getContents, getLine, interact,
                        putStr, putStrLn, readFile, writeFile)
 import System.IO (Handle, IOMode(..), hPutChar, openFile, stdin, stdout,
                   withFile)
-#if __GLASGOW_HASKELL__ <= 610
-import qualified Data.ByteString.Char8 as B
-import Data.Text.Encoding (decodeUtf8, encodeUtf8)
-#else
-import Control.Exception (catch, throwIO)
+import qualified Control.Exception as E
 import Control.Monad (liftM2, when)
 import Data.IORef (readIORef, writeIORef)
 import qualified Data.Text as T
                             HandleType(..), Newline(..))
 import System.IO (hGetBuffering, hFileSize, hSetBuffering, hTell)
 import System.IO.Error (isEOFError)
-#endif
 
 -- $performance
 -- #performance#
 appendFile :: FilePath -> Text -> IO ()
 appendFile p = withFile p AppendMode . flip hPutStr
 
+catchError :: String -> Handle -> Handle__ -> IOError -> IO Text
+catchError caller h Handle__{..} err
+    | isEOFError err = do
+        buf <- readIORef haCharBuffer
+        return $ if isEmptyBuffer buf
+                 then T.empty
+                 else T.singleton '\r'
+    | otherwise = E.throwIO (augmentIOError err caller h)
+
+-- | /Experimental./ Read a single chunk of strict text from a
+-- 'Handle'. The size of the chunk depends on the amount of input
+-- currently buffered.
+--
+-- This function blocks only if there is no data available, and EOF
+-- has not yet been reached. Once EOF is reached, this function
+-- returns an empty string instead of throwing an exception.
+hGetChunk :: Handle -> IO Text
+hGetChunk h = wantReadableHandle "hGetChunk" h readSingleChunk
+ where
+  readSingleChunk hh@Handle__{..} = do
+    buf <- readIORef haCharBuffer
+    t <- readChunk hh buf `E.catch` catchError "hGetChunk" h hh
+    return (hh, t)
+
 -- | Read the remaining contents of a 'Handle' as a string.  The
 -- 'Handle' is closed once the contents have been read, or if an
 -- exception is thrown.
 -- result to construct its result.  For files more than a half of
 -- available RAM in size, this may result in memory exhaustion.
 hGetContents :: Handle -> IO Text
-#if __GLASGOW_HASKELL__ <= 610
-hGetContents = fmap decodeUtf8 . B.hGetContents
-#else
 hGetContents h = do
   chooseGoodBuffering h
   wantReadableHandle "hGetContents" h readAll
  where
   readAll hh@Handle__{..} = do
-    let catchError e
-          | isEOFError e = do
-              buf <- readIORef haCharBuffer
-              return $ if isEmptyBuffer buf
-                       then T.empty
-                       else T.singleton '\r'
-          | otherwise = throwIO (augmentIOError e "hGetContents" h)
-        readChunks = do
+    let readChunks = do
           buf <- readIORef haCharBuffer
-          t <- readChunk hh buf `catch` catchError
+          t <- readChunk hh buf `E.catch` catchError "hGetContents" h hh
           if T.null t
             then return [t]
             else (t:) `fmap` readChunks
     ts <- readChunks
     (hh', _) <- hClose_help hh
     return (hh'{haType=ClosedHandle}, T.concat ts)
-  
+
 -- | Use a more efficient buffer size if we're reading in
 -- block-buffered mode with the default buffer size.  When we can
 -- determine the size of the handle we're reading, set the buffer size
   bufMode <- hGetBuffering h
   case bufMode of
     BlockBuffering Nothing -> do
-      d <- catch (liftM2 (-) (hFileSize h) (hTell h)) $ \(e::IOException) ->
+      d <- E.catch (liftM2 (-) (hFileSize h) (hTell h)) $ \(e::IOException) ->
            if ioe_type e == InappropriateType
            then return 16384 -- faster than the 2KB default
-           else throwIO e
+           else E.throwIO e
       when (d > 0) . hSetBuffering h . BlockBuffering . Just . fromIntegral $ d
     _ -> return ()
-#endif
 
 -- | Read a single line from a handle.
 hGetLine :: Handle -> IO Text
-#if __GLASGOW_HASKELL__ <= 610
-hGetLine = fmap decodeUtf8 . B.hGetLine
-#else
 hGetLine = hGetLineWith T.concat
-#endif
 
 -- | Write a string to a handle.
 hPutStr :: Handle -> Text -> IO ()
-#if __GLASGOW_HASKELL__ <= 610
-hPutStr h = B.hPutStr h . encodeUtf8
-#else
 -- This function is lifted almost verbatim from GHC.IO.Handle.Text.
 hPutStr h t = do
-  (buffer_mode, nl) <- 
+  (buffer_mode, nl) <-
        wantWritableHandle "hPutStr" h $ \h_ -> do
                      bmode <- getSpareBuffer h_
                      return (bmode, haOutputNL h_)
 
 -- This function is completely lifted from GHC.IO.Handle.Text.
 getSpareBuffer :: Handle__ -> IO (BufferMode, CharBuffer)
-getSpareBuffer Handle__{haCharBuffer=ref, 
+getSpareBuffer Handle__{haCharBuffer=ref,
                         haBuffers=spare_ref,
                         haBufferMode=mode}
  = do
 -- This function is completely lifted from GHC.IO.Handle.Text.
 commitBuffer :: Handle -> RawCharBuffer -> Int -> Int -> Bool -> Bool
              -> IO CharBuffer
-commitBuffer hdl !raw !sz !count flush release = 
+commitBuffer hdl !raw !sz !count flush release =
   wantWritableHandle "commitAndReleaseBuffer" hdl $
      commitBuffer' raw sz count flush release
 {-# INLINE commitBuffer #-}
-#endif
 
 -- | Write a string to a handle, followed by a newline.
 hPutStrLn :: Handle -> Text -> IO ()

Data/Text/IO/Internal.hs

 
 module Data.Text.IO.Internal
     (
-#if __GLASGOW_HASKELL__ >= 612
       hGetLineWith
     , readChunk
-#endif
     ) where
 
-#if __GLASGOW_HASKELL__ >= 612
-import Control.Exception (catch)
+import qualified Control.Exception as E
 import Data.IORef (readIORef, writeIORef)
 import Data.Text (Text)
 import Data.Text.Fusion (unstream)
                       withRawBuffer, writeCharBuf)
 import GHC.IO.Handle.Internals (ioe_EOF, readTextDevice, wantReadableHandle_)
 import GHC.IO.Handle.Types (Handle__(..), Newline(..))
-import Prelude hiding (catch)
 import System.IO (Handle)
 import System.IO.Error (isEOFError)
 import qualified Data.Text as T
 -- This function is lifted almost verbatim from GHC.IO.Handle.Text.
 maybeFillReadBuffer :: Handle__ -> CharBuffer -> IO (Maybe CharBuffer)
 maybeFillReadBuffer handle_ buf
-  = catch (Just `fmap` getSomeCharacters handle_ buf) $ \e ->
-      if isEOFError e 
-      then return Nothing 
+  = E.catch (Just `fmap` getSomeCharacters handle_ buf) $ \e ->
+      if isEOFError e
+      then return Nothing
       else ioError e
 
 unpack :: RawCharBuffer -> Int -> Int -> IO Text
 
 sizeError :: String -> a
 sizeError loc = error $ "Data.Text.IO." ++ loc ++ ": bad internal buffer size"
-#endif

Data/Text/Internal.hs

 module Data.Text.Internal
     (
     -- * Types
+    -- $internals
       Text(..)
     -- * Construction
     , text
 
 -- | A space efficient, packed, unboxed Unicode text type.
 data Text = Text
-    {-# UNPACK #-} !A.Array          -- payload
-    {-# UNPACK #-} !Int              -- offset
-    {-# UNPACK #-} !Int              -- length
+    {-# UNPACK #-} !A.Array          -- payload (Word16 elements)
+    {-# UNPACK #-} !Int              -- offset (units of Word16, not Char)
+    {-# UNPACK #-} !Int              -- length (units of Word16, not Char)
     deriving (Typeable)
 
 -- | Smart constructor.
 -- scalar values, but are unfortunately admitted as valid 'Char'
 -- values by Haskell.  They cannot be represented in a 'Text'.  This
 -- function remaps those code points to the Unicode replacement
--- character \"&#xfffd;\", and leaves other code points unchanged.
+-- character (U+FFFD, \'&#xfffd;\'), and leaves other code points
+-- unchanged.
 safe :: Char -> Char
 safe c
     | ord c .&. 0x1ff800 /= 0xd800 = c
 firstf :: (a -> c) -> Maybe (a,b) -> Maybe (c,b)
 firstf f (Just (a, b)) = Just (f a, b)
 firstf _  Nothing      = Nothing
+
+-- $internals
+--
+-- Internally, the 'Text' type is represented as an array of 'Word16'
+-- UTF-16 code units. The offset and length fields in the constructor
+-- are in these units, /not/ units of 'Char'.
+--
+-- Invariants that all functions must maintain:
+--
+-- * Since the 'Text' type uses UTF-16 internally, it cannot represent
+--   characters in the reserved surrogate code point range U+D800 to
+--   U+DFFF. To maintain this invariant, the 'safe' function maps
+--   'Char' values in this range to the replacement character (U+FFFD,
+--   \'&#xfffd;\').
+--
+-- * A leading (or \"high\") surrogate code unit (0xD800–0xDBFF) must
+--   always be followed by a trailing (or \"low\") surrogate code unit
+--   (0xDC00-0xDFFF). A trailing surrogate code unit must always be
+--   preceded by a leading surrogate code unit.

Data/Text/Lazy.hs

 {-# OPTIONS_GHC -fno-warn-orphans #-}
-{-# LANGUAGE BangPatterns, CPP #-}
+{-# LANGUAGE BangPatterns, MagicHash, CPP #-}
+#if __GLASGOW_HASKELL__ >= 702
+{-# LANGUAGE Trustworthy #-}
+#endif
 -- |
 -- Module      : Data.Text.Lazy
--- Copyright   : (c) 2009, 2010 Bryan O'Sullivan
+-- Copyright   : (c) 2009, 2010, 2012 Bryan O'Sullivan
 --
 -- License     : BSD-style
 -- Maintainer  : bos@serpentine.com, rtomharper@googlemail.com,
     , partition
 
     -- , findSubstring
-    
+
     -- * Indexing
     , index
     , count
 import Prelude (Char, Bool(..), Maybe(..), String,
                 Eq(..), Ord(..), Ordering(..), Read(..), Show(..),
                 (&&), (||), (+), (-), (.), ($), (++),
-                div, error, flip, fmap, fromIntegral, not, otherwise)
+                error, flip, fmap, fromIntegral, not, otherwise, quot)
 import qualified Prelude as P
 #if defined(HAVE_DEEPSEQ)
 import Control.DeepSeq (NFData(..))
 import qualified Data.List as L
 import Data.Char (isSpace)
 import Data.Data (Data(gfoldl, toConstr, gunfold, dataTypeOf))
-#if __GLASGOW_HASKELL__ >= 612
 import Data.Data (mkNoRepType)
-#else
-import Data.Data (mkNorepType)
-#endif
 import Data.Monoid (Monoid(..))
 import Data.String (IsString(..))
 import qualified Data.Text as T
 import Data.Text.Internal (firstf, safe, textP)
 import qualified Data.Text.Util as U
 import Data.Text.Lazy.Search (indices)
+#if __GLASGOW_HASKELL__ >= 702
+import qualified GHC.CString as GHC
+#else
+import qualified GHC.Base as GHC
+#endif
+import GHC.Prim (Addr#)
 
 -- $fusion
 --
   gfoldl f z txt = z pack `f` (unpack txt)
   toConstr _     = error "Data.Text.Lazy.Text.toConstr"
   gunfold _ _    = error "Data.Text.Lazy.Text.gunfold"
-#if __GLASGOW_HASKELL__ >= 612
   dataTypeOf _   = mkNoRepType "Data.Text.Lazy.Text"
-#else
-  dataTypeOf _   = mkNorepType "Data.Text.Lazy.Text"
-#endif
 
 -- | /O(n)/ Convert a 'String' into a 'Text'.
 --
 unpack t = S.unstreamList (stream t)
 {-# INLINE [1] unpack #-}
 
+-- | /O(n)/ Convert a literal string into a Text.
+unpackCString# :: Addr# -> Text
+unpackCString# addr# = unstream (S.streamCString# addr#)
+{-# NOINLINE unpackCString# #-}
+
+{-# RULES "TEXT literal" forall a.
+    unstream (S.streamList (L.map safe (GHC.unpackCString# a)))
+      = unpackCString# a #-}
+
+{-# RULES "TEXT literal UTF8" forall a.
+    unstream (S.streamList (L.map safe (GHC.unpackCStringUtf8# a)))
+      = unpackCString# a #-}
+
 -- | /O(1)/ Convert a character into a Text.  Subject to fusion.
 -- Performs replacement on invalid scalar values.
 singleton :: Char -> Text
     | otherwise = replicateChar l c `append` t `append` replicateChar r c
   where len = length t
         d   = k - len
-        r   = d `div` 2
+        r   = d `quot` 2
         l   = d - r
 {-# INLINE center #-}
 
     | otherwise = drop' i t0
   where drop' 0 ts           = ts
         drop' _ Empty        = Empty
-        drop' n (Chunk t ts) 
+        drop' n (Chunk t ts)
             | n < len   = Chunk (T.drop (fromIntegral n) t) ts
             | otherwise = drop' (n - len) ts
             where len   = fromIntegral (T.length t)
 -- > splitOn "\r\n" "a\r\nb\r\nd\r\ne" == ["a","b","d","e"]
 -- > splitOn "aaa"  "aaaXaaaXaaaXaaa"  == ["","X","X","X",""]
 -- > splitOn "x"    "x"                == ["",""]
--- 
+--
 -- and
 --
 -- > intercalate s . splitOn s         == id

Data/Text/Lazy/Builder.hs

 {-# LANGUAGE BangPatterns, CPP, Rank2Types #-}
+#if __GLASGOW_HASKELL__ >= 702
+{-# LANGUAGE Trustworthy #-}
+#endif
 
 -----------------------------------------------------------------------------
 -- |
 -- Module      : Data.Text.Lazy.Builder
--- Copyright   : (c) 2010 Johan Tibell
+-- Copyright   : (c) 2013 Bryan O'Sullivan
+--               (c) 2010 Johan Tibell
 -- License     : BSD3-style (see LICENSE)
--- 
+--
 -- Maintainer  : Johan Tibell <johan.tibell@gmail.com>
 -- Stability   : experimental
 -- Portability : portable to Hugs and GHC
 -- @fromLazyText@, which construct new builders, and 'mappend', which
 -- concatenates two builders.
 --
--- To get maximum performance when building lazy @Text@ values using a builder, associate @mappend@ calls to the right.  For example, prefer
+-- To get maximum performance when building lazy @Text@ values using a
+-- builder, associate @mappend@ calls to the right.  For example,
+-- prefer
 --
 -- > singleton 'a' `mappend` (singleton 'b' `mappend` singleton 'c')
 --
 --
 -- > singleton 'a' `mappend` singleton 'b' `mappend` singleton 'c'
 --
--- as the latter associates @mappend@ to the left.
+-- as the latter associates @mappend@ to the left. Or, equivalently,
+-- prefer
 --
+--  > singleton 'a' <> singleton 'b' <> singleton 'c'
+--
+-- since the '<>' from recent versions of 'Data.Monoid' associates 
+-- to the right.
+
 -----------------------------------------------------------------------------
 
 module Data.Text.Lazy.Builder
    , flush
    ) where
 
-import Control.Monad.ST (ST, runST)
-import Data.Bits ((.&.))
-import Data.Monoid (Monoid(..))
-import Data.Text.Internal (Text(..))
-import Data.Text.Lazy.Internal (smallChunkSize)
-import Data.Text.Unsafe (inlineInterleaveST)
-import Data.Text.UnsafeChar (ord, unsafeWrite)
-import Data.Text.UnsafeShift (shiftR)
-import Prelude hiding (map, putChar)
-
-import qualified Data.String as String
-import qualified Data.Text as S
-import qualified Data.Text.Array as A
-import qualified Data.Text.Lazy as L
-
-------------------------------------------------------------------------
-
--- | A @Builder@ is an efficient way to build lazy @Text@ values.
--- There are several functions for constructing builders, but only one
--- to inspect them: to extract any data, you have to turn them into
--- lazy @Text@ values using @toLazyText@.
---
--- Internally, a builder constructs a lazy @Text@ by filling arrays
--- piece by piece.  As each buffer is filled, it is \'popped\' off, to
--- become a new chunk of the resulting lazy @Text@.  All this is
--- hidden from the user of the @Builder@.
-newtype Builder = Builder {
-     -- Invariant (from Data.Text.Lazy):
-     --      The lists include no null Texts.
-     runBuilder :: forall s. (Buffer s -> ST s [S.Text])
-                -> Buffer s
-                -> ST s [S.Text]
-   }
-
-instance Monoid Builder where
-   mempty  = empty
-   {-# INLINE mempty #-}
-   mappend = append
-   {-# INLINE mappend #-}
-   mconcat = foldr mappend mempty
-   {-# INLINE mconcat #-}
-
-instance String.IsString Builder where
-    fromString = fromString
-    {-# INLINE fromString #-}
-
-instance Show Builder where
-    show = show . toLazyText
-
-instance Eq Builder where
-    a == b = toLazyText a == toLazyText b
-
-instance Ord Builder where
-    a <= b = toLazyText a <= toLazyText b
-
-------------------------------------------------------------------------
-
--- | /O(1)./ The empty @Builder@, satisfying
---
---  * @'toLazyText' 'empty' = 'L.empty'@
---
-empty :: Builder
-empty = Builder (\ k buf -> k buf)
-{-# INLINE empty #-}
-
--- | /O(1)./ A @Builder@ taking a single character, satisfying
---
---  * @'toLazyText' ('singleton' c) = 'L.singleton' c@
---
-singleton :: Char -> Builder
-singleton c = writeAtMost 2 $ \ marr o ->
-    if n < 0x10000
-    then A.unsafeWrite marr o (fromIntegral n) >> return 1
-    else do
-        A.unsafeWrite marr o lo
-        A.unsafeWrite marr (o+1) hi
-        return 2
-  where n = ord c
-        m = n - 0x10000
-        lo = fromIntegral $ (m `shiftR` 10) + 0xD800
-        hi = fromIntegral $ (m .&. 0x3FF) + 0xDC00
-{-# INLINE singleton #-}
-
-------------------------------------------------------------------------
-
--- | /O(1)./ The concatenation of two builders, an associative
--- operation with identity 'empty', satisfying
---
---  * @'toLazyText' ('append' x y) = 'L.append' ('toLazyText' x) ('toLazyText' y)@
---
-append :: Builder -> Builder -> Builder
-append (Builder f) (Builder g) = Builder (f . g)
-{-# INLINE [0] append #-}
-
--- TODO: Experiment to find the right threshold.
-copyLimit :: Int
-copyLimit = 128                                 
-
--- This function attempts to merge small @Text@ values instead of
--- treating each value as its own chunk.  We may not always want this.
-
--- | /O(1)./ A @Builder@ taking a 'S.Text', satisfying
---
---  * @'toLazyText' ('fromText' t) = 'L.fromChunks' [t]@
---
-fromText :: S.Text -> Builder
-fromText t@(Text arr off l)
-    | S.null t       = empty
-    | l <= copyLimit = writeN l $ \marr o -> A.copyI marr o arr off (l+o)
-    | otherwise      = flush `append` mapBuilder (t :)
-{-# INLINE [1] fromText #-}
-
-{-# RULES
-"fromText/pack" forall s .
-        fromText (S.pack s) = fromString s
- #-}
-
--- | /O(1)./ A Builder taking a @String@, satisfying
---
---  * @'toLazyText' ('fromString' s) = 'L.fromChunks' [S.pack s]@
---
-fromString :: String -> Builder
-fromString str = Builder $ \k (Buffer p0 o0 u0 l0) ->
-    let loop !marr !o !u !l [] = k (Buffer marr o u l)
-        loop marr o u l s@(c:cs)
-            | l <= 1 = do
-                arr <- A.unsafeFreeze marr
-                let !t = Text arr o u
-                marr' <- A.new chunkSize
-                ts <- inlineInterleaveST (loop marr' 0 0 chunkSize s)
-                return $ t : ts
-            | otherwise = do
-                n <- unsafeWrite marr (o+u) c
-                loop marr o (u+n) (l-n) cs
-    in loop p0 o0 u0 l0 str
-  where
-    chunkSize = smallChunkSize
-{-# INLINE fromString #-}
-
--- | /O(1)./ A @Builder@ taking a lazy @Text@, satisfying
---
---  * @'toLazyText' ('fromLazyText' t) = t@
---
-fromLazyText :: L.Text -> Builder
-fromLazyText ts = flush `append` mapBuilder (L.toChunks ts ++)
-{-# INLINE fromLazyText #-}
-
-------------------------------------------------------------------------
-
--- Our internal buffer type
-data Buffer s = Buffer {-# UNPACK #-} !(A.MArray s)
-                       {-# UNPACK #-} !Int  -- offset
-                       {-# UNPACK #-} !Int  -- used units
-                       {-# UNPACK #-} !Int  -- length left
-
-------------------------------------------------------------------------
-
--- | /O(n)./ Extract a lazy @Text@ from a @Builder@ with a default
--- buffer size.  The construction work takes place if and when the
--- relevant part of the lazy @Text@ is demanded.
-toLazyText :: Builder -> L.Text
-toLazyText = toLazyTextWith smallChunkSize
-
--- | /O(n)./ Extract a lazy @Text@ from a @Builder@, using the given
--- size for the initial buffer.  The construction work takes place if
--- and when the relevant part of the lazy @Text@ is demanded.
---
--- If the initial buffer is too small to hold all data, subsequent
--- buffers will be the default buffer size.
-toLazyTextWith :: Int -> Builder -> L.Text
-toLazyTextWith chunkSize m = L.fromChunks (runST $
-  newBuffer chunkSize >>= runBuilder (m `append` flush) (const (return [])))
-
--- | /O(1)./ Pop the strict @Text@ we have constructed so far, if any,
--- yielding a new chunk in the result lazy @Text@.
-flush :: Builder
-flush = Builder $ \ k buf@(Buffer p o u l) ->
-    if u == 0
-    then k buf
-    else do arr <- A.unsafeFreeze p
-            let !b = Buffer p (o+u) 0 l
-                !t = Text arr o u
-            ts <- inlineInterleaveST (k b)
-            return $! t : ts
-
-------------------------------------------------------------------------
-
--- | Sequence an ST operation on the buffer
-withBuffer :: (forall s. Buffer s -> ST s (Buffer s)) -> Builder
-withBuffer f = Builder $ \k buf -> f buf >>= k
-{-# INLINE withBuffer #-}
-
--- | Get the size of the buffer
-withSize :: (Int -> Builder) -> Builder
-withSize f = Builder $ \ k buf@(Buffer _ _ _ l) ->
-    runBuilder (f l) k buf
-{-# INLINE withSize #-}
-
--- | Map the resulting list of texts.
-mapBuilder :: ([S.Text] -> [S.Text]) -> Builder
-mapBuilder f = Builder (fmap f .)
-
-------------------------------------------------------------------------
-
--- | Ensure that there are at least @n@ many elements available.
-ensureFree :: Int -> Builder
-ensureFree !n = withSize $ \ l ->
-    if n <= l
-    then empty
-    else flush `append'` withBuffer (const (newBuffer (max n smallChunkSize)))
-{-# INLINE [0] ensureFree #-}
-
-writeAtMost :: Int -> (forall s. A.MArray s -> Int -> ST s Int) -> Builder
-writeAtMost n f = ensureFree n `append'` withBuffer (writeBuffer f)
-{-# INLINE [0] writeAtMost #-}
-
--- | Ensure that @n@ many elements are available, and then use @f@ to
--- write some elements into the memory.
-writeN :: Int -> (forall s. A.MArray s -> Int -> ST s ()) -> Builder
-writeN n f = writeAtMost n (\ p o -> f p o >> return n)
-{-# INLINE writeN #-}
-
-writeBuffer :: (A.MArray s -> Int -> ST s Int) -> Buffer s -> ST s (Buffer s)
-writeBuffer f (Buffer p o u l) = do
-    n <- f p (o+u)
-    return $! Buffer p o (u+n) (l-n)
-{-# INLINE writeBuffer #-}
-
-newBuffer :: Int -> ST s (Buffer s)
-newBuffer size = do
-    arr <- A.new size
-    return $! Buffer arr 0 0 size
-{-# INLINE newBuffer #-}
-
-------------------------------------------------------------------------
--- Some nice rules for Builder
-
--- This function makes GHC understand that 'writeN' and 'ensureFree'
--- are *not* recursive in the precense of the rewrite rules below.
--- This is not needed with GHC 7+.
-append' :: Builder -> Builder -> Builder
-append' (Builder f) (Builder g) = Builder (f . g)
-{-# INLINE append' #-}
-
-{-# RULES
-
-"append/writeAtMost" forall a b (f::forall s. A.MArray s -> Int -> ST s Int)
-                           (g::forall s. A.MArray s -> Int -> ST s Int) ws.
-    append (writeAtMost a f) (append (writeAtMost b g) ws) =
-        append (writeAtMost (a+b) (\marr o -> f marr o >>= \ n -> 
-                                    g marr (o+n) >>= \ m ->
-                                    let s = n+m in s `seq` return s)) ws
-
-"writeAtMost/writeAtMost" forall a b (f::forall s. A.MArray s -> Int -> ST s Int)
-                           (g::forall s. A.MArray s -> Int -> ST s Int).
-    append (writeAtMost a f) (writeAtMost b g) =
-        writeAtMost (a+b) (\marr o -> f marr o >>= \ n -> 
-                            g marr (o+n) >>= \ m ->
-                            let s = n+m in s `seq` return s)
-
-"ensureFree/ensureFree" forall a b .
-    append (ensureFree a) (ensureFree b) = ensureFree (max a b)
-
-"flush/flush"
-    append flush flush = flush
-
- #-}
+import Data.Text.Lazy.Builder.Internal

Data/Text/Lazy/Builder/Int.hs

-{-# LANGUAGE BangPatterns, CPP, MagicHash, UnboxedTuples #-}
+{-# LANGUAGE BangPatterns, CPP, MagicHash, RankNTypes, UnboxedTuples #-}
+#if __GLASGOW_HASKELL__ >= 702
+{-# LANGUAGE Trustworthy #-}
+#endif
 
 -- Module:      Data.Text.Lazy.Builder.Int
--- Copyright:   (c) 2011 MailRank, Inc.
+-- Copyright:   (c) 2013 Bryan O'Sullivan
+--              (c) 2011 MailRank, Inc.
 -- License:     BSD3
 -- Maintainer:  Bryan O'Sullivan <bos@serpentine.com>
 -- Stability:   experimental
 
 import Data.Int (Int8, Int16, Int32, Int64)
 import Data.Monoid (mempty)
+import qualified Data.ByteString.Unsafe as B
 import Data.Text.Lazy.Builder.Functions ((<>), i2d)
-import Data.Text.Lazy.Builder
+import Data.Text.Lazy.Builder.Internal
+import Data.Text.Lazy.Builder.Int.Digits (digits)
+import Data.Text.Array
 import Data.Word (Word, Word8, Word16, Word32, Word64)
 import GHC.Base (quotInt, remInt)
 import GHC.Num (quotRemInteger)
 import GHC.Types (Int(..))
+import Control.Monad.ST
 
 #ifdef  __GLASGOW_HASKELL__
-# if __GLASGOW_HASKELL__ < 611
-import GHC.Integer.Internals
+# if defined(INTEGER_GMP)
+import GHC.Integer.GMP.Internals
+# elif defined(INTEGER_SIMPLE)
+import GHC.Integer
 # else
-import GHC.Integer.GMP.Internals
+# error "You need to use either GMP or integer-simple."
 # endif
 #endif
 
-#ifdef INTEGER_GMP
+#if defined(INTEGER_GMP) || defined(INTEGER_SIMPLE)
 # define PAIR(a,b) (# a,b #)
 #else
 # define PAIR(a,b) (a,b)
 #endif
 
 decimal :: Integral a => a -> Builder
-{-# SPECIALIZE decimal :: Int -> Builder #-}
 {-# SPECIALIZE decimal :: Int8 -> Builder #-}
-{-# SPECIALIZE decimal :: Int16 -> Builder #-}
-{-# SPECIALIZE decimal :: Int32 -> Builder #-}
-{-# SPECIALIZE decimal :: Int64 -> Builder #-}
-{-# SPECIALIZE decimal :: Word -> Builder #-}
-{-# SPECIALIZE decimal :: Word8 -> Builder #-}
-{-# SPECIALIZE decimal :: Word16 -> Builder #-}
-{-# SPECIALIZE decimal :: Word32 -> Builder #-}
-{-# SPECIALIZE decimal :: Word64 -> Builder #-}
+{-# RULES "decimal/Int" decimal = boundedDecimal :: Int -> Builder #-}
+{-# RULES "decimal/Int16" decimal = boundedDecimal :: Int16 -> Builder #-}
+{-# RULES "decimal/Int32" decimal = boundedDecimal :: Int32 -> Builder #-}
+{-# RULES "decimal/Int64" decimal = boundedDecimal :: Int64 -> Builder #-}
+{-# RULES "decimal/Word" decimal = positive :: Word -> Builder #-}
+{-# RULES "decimal/Word8" decimal = positive :: Word8 -> Builder #-}
+{-# RULES "decimal/Word16" decimal = positive :: Word16 -> Builder #-}
+{-# RULES "decimal/Word32" decimal = positive :: Word32 -> Builder #-}
+{-# RULES "decimal/Word64" decimal = positive :: Word64 -> Builder #-}
 {-# RULES "decimal/Integer" decimal = integer 10 :: Integer -> Builder #-}
-decimal i
-    | i < 0     = singleton '-' <> go (-i)
-    | otherwise = go i
-  where
-    go n | n < 10    = digit n
-         | otherwise = go (n `quot` 10) <> digit (n `rem` 10)
+decimal i = decimal' (<= -128) i
+
+boundedDecimal :: (Integral a, Bounded a) => a -> Builder
+{-# SPECIALIZE boundedDecimal :: Int -> Builder #-}
+{-# SPECIALIZE boundedDecimal :: Int8 -> Builder #-}
+{-# SPECIALIZE boundedDecimal :: Int16 -> Builder #-}
+{-# SPECIALIZE boundedDecimal :: Int32 -> Builder #-}
+{-# SPECIALIZE boundedDecimal :: Int64 -> Builder #-}
+boundedDecimal i = decimal' (== minBound) i
+
+decimal' :: (Integral a) => (a -> Bool) -> a -> Builder
+{-# INLINE decimal' #-}
+decimal' p i
+    | i < 0 = if p i
+              then let (q, r) = i `quotRem` 10
+                       qq = -q
+                       !n = countDigits qq
+                   in writeN (n + 2) $ \marr off -> do
+                       unsafeWrite marr off minus
+                       posDecimal marr (off+1) n qq
+                       unsafeWrite marr (off+n+1) (i2w (-r))
+              else let j = -i
+                       !n = countDigits j
+                   in writeN (n + 1) $ \marr off ->
+                       unsafeWrite marr off minus >> posDecimal marr (off+1) n j
+    | otherwise = positive i
+
+positive :: (Integral a) => a -> Builder
+{-# SPECIALIZE positive :: Int -> Builder #-}
+{-# SPECIALIZE positive :: Int8 -> Builder #-}
+{-# SPECIALIZE positive :: Int16 -> Builder #-}
+{-# SPECIALIZE positive :: Int32 -> Builder #-}
+{-# SPECIALIZE positive :: Int64 -> Builder #-}
+{-# SPECIALIZE positive :: Word -> Builder #-}
+{-# SPECIALIZE positive :: Word8 -> Builder #-}
+{-# SPECIALIZE positive :: Word16 -> Builder #-}
+{-# SPECIALIZE positive :: Word32 -> Builder #-}
+{-# SPECIALIZE positive :: Word64 -> Builder #-}
+positive i
+    | i < 10    = writeN 1 $ \marr off -> unsafeWrite marr off (i2w i)
+    | otherwise = let !n = countDigits i
+                  in writeN n $ \marr off -> posDecimal marr off n i
+
+posDecimal :: (Integral a) =>
+              forall s. MArray s -> Int -> Int -> a -> ST s ()
+{-# INLINE posDecimal #-}
+posDecimal marr off0 ds v0 = go (off0 + ds - 1) v0
+  where go off v
+           | v >= 100 = do
+               let (q, r) = v `quotRem` 100
+               write2 off r
+               go (off - 2) q
+           | v < 10    = unsafeWrite marr off (i2w v)
+           | otherwise = write2 off v
+        write2 off i0 = do
+          let i = fromIntegral i0; j = i + i
+          unsafeWrite marr off $ get (j + 1)
+          unsafeWrite marr (off - 1) $ get j
+        get = fromIntegral . B.unsafeIndex digits
+
+minus, zero :: Word16
+{-# INLINE minus #-}
+{-# INLINE zero #-}
+minus = 45
+zero = 48
+
+i2w :: (Integral a) => a -> Word16
+{-# INLINE i2w #-}
+i2w v = zero + fromIntegral v
+
+countDigits :: (Integral a) => a -> Int
+{-# INLINE countDigits #-}
+countDigits v0 = go 1 (fromIntegral v0 :: Word64)
+  where go !k v
+           | v < 10    = k
+           | v < 100   = k + 1
+           | v < 1000  = k + 2
+           | v < 1000000000000 =
+               k + if v < 100000000
+                   then if v < 1000000
+                        then if v < 10000
+                             then 3
+                             else 4 + fin v 100000
+                        else 6 + fin v 10000000
+                   else if v < 10000000000
+                        then 8 + fin v 1000000000
+                        else 10 + fin v 100000000000
+           | otherwise = go (k + 12) (v `quot` 1000000000000)
+        fin v n = if v >= n then 1 else 0
 
 hexadecimal :: Integral a => a -> Builder
 {-# SPECIALIZE hexadecimal :: Int -> Builder #-}
 {-# SPECIALIZE hexadecimal :: Word16 -> Builder #-}
 {-# SPECIALIZE hexadecimal :: Word32 -> Builder #-}
 {-# SPECIALIZE hexadecimal :: Word64 -> Builder #-}
-{-# RULES "hexadecimal/Integer" hexadecimal = integer 16 :: Integer -> Builder #-}
+{-# RULES "hexadecimal/Integer"
+    hexadecimal = hexInteger :: Integer -> Builder #-}
 hexadecimal i
-    | i < 0     = singleton '-' <> go (-i)
+    | i < 0     = error hexErrMsg
     | otherwise = go i
   where
     go n | n < 16    = hexDigit n
          | otherwise = go (n `quot` 16) <> hexDigit (n `rem` 16)
 
-digit :: Integral a => a -> Builder
-digit n = singleton $! i2d (fromIntegral n)
-{-# INLINE digit #-}
+hexInteger :: Integer -> Builder
+hexInteger i
+    | i < 0     = error hexErrMsg
+    | otherwise = integer 16 i
+
+hexErrMsg :: String
+hexErrMsg = "Data.Text.Lazy.Builder.Int.hexadecimal: applied to negative number"
 
 hexDigit :: Integral a => a -> Builder
 hexDigit n
 data T = T !Integer !Int
 
 integer :: Int -> Integer -> Builder
+#ifdef INTEGER_GMP
 integer 10 (S# i#) = decimal (I# i#)
 integer 16 (S# i#) = hexadecimal (I# i#)
+#else
+integer 10 i = decimal i
+integer 16 i = hexadecimal i
+#endif
 integer base i
     | i < 0     = singleton '-' <> go (-i)
     | otherwise = go i
     pblock = loop maxDigits
       where
         loop !d !n
-            | d == 1    = digit n
-            | otherwise = loop (d-1) q <> digit r
+            | d == 1    = hexDigit n
+            | otherwise = loop (d-1) q <> hexDigit r
             where q = n `quotInt` base
                   r = n `remInt` base

Data/Text/Lazy/Builder/Int/Digits.hs

+{-# LANGUAGE OverloadedStrings #-}
+
+-- Module:      Data.Text.Lazy.Builder.Int.Digits
+-- Copyright:   (c) 2013 Bryan O'Sullivan
+-- License:     BSD3
+-- Maintainer:  Bryan O'Sullivan <bos@serpentine.com>
+-- Stability:   experimental
+-- Portability: portable
+--
+-- This module exists because the C preprocessor does things that we
+-- shall not speak of when confronted with Haskell multiline strings.
+
+module Data.Text.Lazy.Builder.Int.Digits (digits) where
+
+import Data.ByteString.Char8 (ByteString)
+
+digits :: ByteString
+digits = "0001020304050607080910111213141516171819\
+         \2021222324252627282930313233343536373839\
+         \4041424344454647484950515253545556575859\
+         \6061626364656667686970717273747576777879\
+         \8081828384858687888990919293949596979899"

Data/Text/Lazy/Builder/Internal.hs

+{-# LANGUAGE BangPatterns, CPP, Rank2Types #-}
+
+-----------------------------------------------------------------------------
+-- |
+-- Module      : Data.Text.Lazy.Builder.Internal
+-- Copyright   : (c) 2013 Bryan O'Sullivan
+--               (c) 2010 Johan Tibell
+-- License     : BSD3-style (see LICENSE)
+--
+-- Maintainer  : Johan Tibell <johan.tibell@gmail.com>
+-- Stability   : experimental
+-- Portability : portable to Hugs and GHC
+--
+-- Efficient construction of lazy @Text@ values.  The principal
+-- operations on a @Builder@ are @singleton@, @fromText@, and
+-- @fromLazyText@, which construct new builders, and 'mappend', which
+-- concatenates two builders.
+--
+-- To get maximum performance when building lazy @Text@ values using a
+-- builder, associate @mappend@ calls to the right.  For example,
+-- prefer
+--
+-- > singleton 'a' `mappend` (singleton 'b' `mappend` singleton 'c')
+--
+-- to
+--
+-- > singleton 'a' `mappend` singleton 'b' `mappend` singleton 'c'
+--
+-- as the latter associates @mappend@ to the left.
+--
+-----------------------------------------------------------------------------
+
+module Data.Text.Lazy.Builder.Internal
+   ( -- * Public API
+     -- ** The Builder type
+     Builder
+   , toLazyText
+   , toLazyTextWith
+
+     -- ** Constructing Builders
+   , singleton
+   , fromText
+   , fromLazyText
+   , fromString
+
+     -- ** Flushing the buffer state
+   , flush
+