Commits

Bryan O'Sullivan  committed 16ff63b

Split encoding support out into new modules

  • Participants
  • Parent commits 4762cdc

Comments (0)

Files changed (6)

File Data/Text.hs

 
     -- * Types
       Text
-    , Encoding(..)
 
     -- * Creation and elimination
     , pack
     , unpack
     , singleton
     , empty
-    , encode
-    , decode
 
     -- * Basic interface
     , cons
 
     -- * Zipping and unzipping
     , zipWith
-
-    -- * I/O
-    , readFile
     ) where
 
 import Prelude (Char, Bool, Int, Maybe, String,
                 Eq, (==), (++), error,
                 Show, showsPrec,
                 Read, readsPrec,
-                (&&), (||), (+), (-), (<), (>), (<=), (>=), (.), (>>=),
-                return, otherwise,
-                IO, FilePath)
+                (&&), (||), (+), (-), (<), (>), (<=), (>=), (.),
+                return, otherwise)
 import Data.Char (isSpace)
 import Control.Monad.ST (ST)
 import qualified Data.Text.Array as A
-import qualified Data.ByteString as B
-import Data.ByteString (ByteString)
 import qualified Data.List as L
 import Data.Monoid (Monoid(..))
 import Data.Word (Word16)
 import Data.String (IsString(..))
 
 import qualified Data.Text.Fusion as S
-import Data.Text.Fusion (Stream(..), Step(..), Encoding(..),
-                         stream, unstream, stream_bs, unstream_bs, restream)
+import Data.Text.Fusion (Stream(..), Step(..), stream, unstream)
 import Data.Text.Internal (Text(..), empty)
 import qualified Prelude as P
 import Data.Text.UnsafeChar (unsafeChr)
       next []     = Done
 {-# INLINE [1] singleton #-}
 
-decode        :: Encoding -> ByteString -> Text
-decode enc bs = unstream (stream_bs enc bs)
-{-# INLINE decode #-}
-
-encode         :: Encoding -> Text -> ByteString
-encode enc txt = unstream_bs (restream enc (stream txt))
-{-# INLINE encode #-}
-
 -- -----------------------------------------------------------------------------
 -- * Basic functions
 
 zipWith :: (Char -> Char -> Char) -> Text -> Text -> Text
 zipWith f t1 t2 = unstream (S.zipWith f (stream t1) (stream t2))
 
--- File I/O
-
-readFile :: Encoding -> FilePath -> IO Text
-readFile enc f = B.readFile f >>= return . unstream . stream_bs enc
-{-# INLINE [1] readFile #-}
-
 words :: Text -> [Text]
 words (Text arr off len) = loop0 off off
     where

File Data/Text/Encoding.hs

+-- |
+-- Module      : Data.Text.Encoding
+-- Copyright   : (c) Tom Harper 2008-2009,
+--               (c) Bryan O'Sullivan 2009,
+--               (c) Duncan Coutts 2009
+--
+-- License     : BSD-style
+-- Maintainer  : rtharper@aftereternity.co.uk, bos@serpentine.com,
+--               duncan@haskell.org
+-- Stability   : experimental
+-- Portability : portable
+--
+-- Functions for converting 'Text' values to and from 'ByteString',
+-- using several common encodings.
+
+module Data.Text.Encoding
+    (
+    -- * Decoding ByteStrings to Text
+      decodeASCII
+    , decodeUtf8
+    , decodeUtf16LE
+    , decodeUtf16BE
+    , decodeUtf32LE
+    , decodeUtf32BE
+
+    -- * Encoding Text to ByteStrings
+    , encodeASCII
+    , encodeUtf8
+    , encodeUtf16LE
+    , encodeUtf16BE
+    , encodeUtf32LE
+    , encodeUtf32BE
+    ) where
+    
+import Data.ByteString (ByteString)
+import qualified Data.Text.Fusion as F
+import qualified Data.Text.Encoding.Fusion as E
+import Data.Text.Internal (Text)
+
+-- | Decode a 'ByteString' containing 7-bit ASCII encoded text.
+decodeASCII :: ByteString -> Text
+decodeASCII bs = F.unstream (E.streamASCII bs)
+{-# INLINE decodeASCII #-}
+
+-- | Decode a 'ByteString' containing UTF-8 encoded text.
+decodeUtf8 :: ByteString -> Text
+decodeUtf8 bs = F.unstream (E.streamUtf8 bs)
+{-# INLINE decodeUtf8 #-}
+
+-- | Encode text using a 7-bit ASCII representation. /Note/: non-ASCII
+-- characters in the input 'Text' will be /truncated/.
+encodeASCII :: Text -> ByteString
+encodeASCII txt = E.unstream (E.restreamASCII (F.stream txt))
+{-# INLINE encodeASCII #-}
+
+-- | Encode text using UTF-8 encoding.
+encodeUtf8 :: Text -> ByteString
+encodeUtf8 txt = E.unstream (E.restreamUtf8 (F.stream txt))
+{-# INLINE encodeUtf8 #-}
+
+-- | Decode text from little endian UTF-16 encoding.
+decodeUtf16LE :: ByteString -> Text
+decodeUtf16LE bs = F.unstream (E.streamUtf16LE bs)
+{-# INLINE decodeUtf16LE #-}
+
+-- | Decode text from big endian UTF-16 encoding.
+decodeUtf16BE :: ByteString -> Text
+decodeUtf16BE bs = F.unstream (E.streamUtf16BE bs)
+{-# INLINE decodeUtf16BE #-}
+
+-- | Encode text using little endian UTF-16 encoding.
+encodeUtf16LE :: Text -> ByteString
+encodeUtf16LE txt = E.unstream (E.restreamUtf16LE (F.stream txt))
+{-# INLINE encodeUtf16LE #-}
+
+-- | Encode text using big endian UTF-16 encoding.
+encodeUtf16BE :: Text -> ByteString
+encodeUtf16BE txt = E.unstream (E.restreamUtf16BE (F.stream txt))
+{-# INLINE encodeUtf16BE #-}
+
+-- | Decode text from little endian UTF-32 encoding.
+decodeUtf32LE :: ByteString -> Text
+decodeUtf32LE bs = F.unstream (E.streamUtf32LE bs)
+{-# INLINE decodeUtf32LE #-}
+
+-- | Decode text from big endian UTF-32 encoding.
+decodeUtf32BE :: ByteString -> Text
+decodeUtf32BE bs = F.unstream (E.streamUtf32LE bs)
+{-# INLINE decodeUtf32BE #-}
+
+-- | Encode text using little endian UTF-32 encoding.
+encodeUtf32LE :: Text -> ByteString
+encodeUtf32LE txt = E.unstream (E.restreamUtf32LE (F.stream txt))
+{-# INLINE encodeUtf32LE #-}
+
+-- | Encode text using big endian UTF-32 encoding.
+encodeUtf32BE :: Text -> ByteString
+encodeUtf32BE txt = E.unstream (E.restreamUtf32BE (F.stream txt))
+{-# INLINE encodeUtf32BE #-}

File Data/Text/Encoding/Fusion.hs

+{-# LANGUAGE BangPatterns #-}
+
+-- |
+-- Module      : Data.Text.Encoding
+-- Copyright   : (c) Tom Harper 2008-2009,
+--               (c) Bryan O'Sullivan 2009,
+--               (c) Duncan Coutts 2009
+--
+-- License     : BSD-style
+-- Maintainer  : rtharper@aftereternity.co.uk, bos@serpentine.com,
+--               duncan@haskell.org
+-- Stability   : experimental
+-- Portability : portable
+--
+-- Fusible 'Stream'-oriented functions for converting between 'Text'
+-- and several common encodings.
+
+module Data.Text.Encoding.Fusion
+    (
+    -- * Streaming
+      streamASCII
+    , streamUtf8
+    , streamUtf16LE
+    , streamUtf16BE
+    , streamUtf32LE
+    , streamUtf32BE
+
+    -- * Unstreaming
+    , unstream
+
+    -- * Restreaming
+    -- Restreaming is the act of converting from one 'Stream'
+    -- representation to another.
+    , restreamASCII
+    , restreamUtf8
+    , restreamUtf16LE
+    , restreamUtf16BE
+    , restreamUtf32LE
+    , restreamUtf32BE
+    ) where
+
+import Control.Exception (assert)
+import Data.Bits (shiftL, shiftR, (.&.))
+import Data.ByteString as B
+import Data.ByteString.Internal (ByteString(..), mallocByteString, memcpy)
+import Data.Char (ord)
+import Data.Text.Fusion (Step(..), Stream(..))
+import Data.Text.UnsafeChar (unsafeChr, unsafeChr8, unsafeChr32)
+import Data.Word (Word8, Word16, Word32)
+import Foreign.ForeignPtr (withForeignPtr, ForeignPtr)
+import Foreign.Storable (pokeByteOff)
+import System.IO.Unsafe (unsafePerformIO)
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Unsafe as B
+import qualified Data.Text.Utf16 as U16
+import qualified Data.Text.Utf32 as U32
+import qualified Data.Text.Utf8 as U8
+
+data T4 a b c d = T4 !a !b !c !d
+
+streamASCII :: ByteString -> Stream Char
+streamASCII bs = Stream next 0 l
+    where
+      l = B.length bs
+      {-# INLINE next #-}
+      next i
+          | i >= l    = Done
+          | otherwise = Yield (unsafeChr8 x1) (i+1)
+          where
+            x1 = B.unsafeIndex bs i
+{-# INLINE [0] streamASCII #-}
+
+-- | /O(n) Convert a ByteString into a Stream Char, using the specified encoding standard.
+streamUtf8 :: ByteString -> Stream Char
+streamUtf8 bs = Stream next 0 l
+    where
+      l = B.length bs
+      {-# INLINE next #-}
+      next i
+          | i >= l = Done
+          | U8.validate1 x1 = Yield (unsafeChr8 x1) (i+1)
+          | i+1 < l && U8.validate2 x1 x2 = Yield (U8.chr2 x1 x2) (i+2)
+          | i+2 < l && U8.validate3 x1 x2 x3 = Yield (U8.chr3 x1 x2 x3) (i+3)
+          | i+3 < l && U8.validate4 x1 x2 x3 x4 = Yield (U8.chr4 x1 x2 x3 x4) (i+4)
+          | otherwise = error "bsStream: bad UTF-8 stream"
+          where
+            x1 = idx i
+            x2 = idx (i + 1)
+            x3 = idx (i + 2)
+            x4 = idx (i + 3)
+            idx = B.unsafeIndex bs
+{-# INLINE [0] streamUtf8 #-}
+
+streamUtf16LE :: ByteString -> Stream Char
+streamUtf16LE bs = Stream next 0 l
+    where
+      l = B.length bs
+      {-# INLINE next #-}
+      next i
+          | i >= l                         = Done
+          | i+1 < l && U16.validate1 x1    = Yield (unsafeChr x1) (i+2)
+          | i+3 < l && U16.validate2 x1 x2 = Yield (U16.chr2 x1 x2) (i+4)
+          | otherwise = error $ "bsStream: bad UTF-16LE stream"
+          where
+            x1    = (shiftL (idx (i + 1)) 8) + (idx i)
+            x2    = (shiftL (idx (i + 3)) 8) + (idx (i + 2))
+            idx = fromIntegral . B.unsafeIndex bs :: Int -> Word16
+{-# INLINE [0] streamUtf16LE #-}
+
+streamUtf16BE :: ByteString -> Stream Char
+streamUtf16BE bs = Stream next 0 l
+    where
+      l = B.length bs
+      {-# INLINE next #-}
+      next i
+          | i >= l                         = Done
+          | i+1 < l && U16.validate1 x1    = Yield (unsafeChr x1) (i+2)
+          | i+3 < l && U16.validate2 x1 x2 = Yield (U16.chr2 x1 x2) (i+4)
+          | otherwise = error $ "bsStream: bad UTF16-BE stream "
+          where
+            x1    = (shiftL (idx i) 8) + (idx (i + 1))
+            x2    = (shiftL (idx (i + 2)) 8) + (idx (i + 3))
+            idx = fromIntegral . B.unsafeIndex bs :: Int -> Word16
+{-# INLINE [0] streamUtf16BE #-}
+
+streamUtf32BE :: ByteString -> Stream Char
+streamUtf32BE bs = Stream next 0 l
+    where
+      l = B.length bs
+      {-# INLINE next #-}
+      next i
+          | i >= l                    = Done
+          | i+3 < l && U32.validate x = Yield (unsafeChr32 x) (i+4)
+          | otherwise                 = error "bsStream: bad UTF-32BE stream"
+          where
+            x     = shiftL x1 24 + shiftL x2 16 + shiftL x3 8 + x4
+            x1    = idx i
+            x2    = idx (i+1)
+            x3    = idx (i+2)
+            x4    = idx (i+3)
+            idx = fromIntegral . B.unsafeIndex bs :: Int -> Word32
+{-# INLINE [0] streamUtf32BE #-}
+
+streamUtf32LE :: ByteString -> Stream Char
+streamUtf32LE bs = Stream next 0 l
+    where
+      l = B.length bs
+      {-# INLINE next #-}
+      next i
+          | i >= l                    = Done
+          | i+3 < l && U32.validate x = Yield (unsafeChr32 x) (i+4)
+          | otherwise                 = error "bsStream: bad UTF-32LE stream"
+          where
+            x     = shiftL x4 24 + shiftL x3 16 + shiftL x2 8 + x1
+            x1    = idx i
+            x2    = idx $ i+1
+            x3    = idx $ i+2
+            x4    = idx $ i+3
+            idx = fromIntegral . B.unsafeIndex bs :: Int -> Word32
+{-# INLINE [0] streamUtf32LE #-}
+
+restreamASCII :: Stream Char -> Stream Word8
+restreamASCII (Stream next0 s0 len) =  Stream next s0 (len*2)
+    where
+      next !s = case next0 s of
+                  Done -> Done
+                  Skip s' -> Skip s'
+                  Yield x xs -> Yield x' xs
+                      where x' = fromIntegral (ord x) :: Word8
+{-# INLINE restreamASCII #-}
+
+-- | /O(n)/ Convert a Stream Char into a UTF-8 encoded Stream Word8.
+restreamUtf8 :: Stream Char -> Stream Word8
+restreamUtf8 (Stream next0 s0 len) =
+    Stream next (T4 (Just s0) Nothing Nothing Nothing) (len*2)
+    where
+      {-# INLINE next #-}
+      next (T4 (Just s) Nothing Nothing Nothing) = case next0 s of
+                  Done              -> Done
+                  Skip s'           -> Skip (T4 (Just s') Nothing Nothing Nothing)
+                  Yield x xs
+                      | n <= 0x7F   -> Yield c         (T4 (Just xs) Nothing Nothing Nothing)
+                      | n <= 0x07FF -> Yield (fst c2)  (T4 (Just xs) (Just $ snd c2) Nothing Nothing)
+                      | n <= 0xFFFF -> Yield (fst3 c3) (T4 (Just xs) (Just $ snd3 c3) (Just $ trd3 c3) Nothing)
+                      | otherwise   -> Yield (fst4 c4) (T4 (Just xs) (Just $ snd4 c4) (Just $ trd4 c4) (Just $ fth4 c4))
+                      where
+                        n  = ord x
+                        c  = fromIntegral n
+                        c2 = U8.ord2 x
+                        c3 = U8.ord3 x
+                        c4 = U8.ord4 x
+                        fst3 (x1,_,_)   = x1
+                        snd3 (_,x2,_)   = x2
+                        trd3 (_,_,x3)   = x3
+                        fst4 (x1,_,_,_) = x1
+                        snd4 (_,x2,_,_) = x2
+                        trd4 (_,_,x3,_) = x3
+                        fth4 (_,_,_,x4) = x4
+      next (T4 (Just s) (Just x2) Nothing Nothing) = Yield x2 (T4 (Just s) Nothing Nothing Nothing)
+      next (T4 (Just s) (Just x2) x3 Nothing)      = Yield x2 (T4 (Just s) x3 Nothing Nothing)
+      next (T4 (Just s) (Just x2) x3 x4)           = Yield x2 (T4 (Just s) x3 x4 Nothing)
+      next _ = internalError "restreamUtf8"
+{-# INLINE restreamUtf8 #-}
+
+restreamUtf16BE :: Stream Char -> Stream Word8
+restreamUtf16BE (Stream next0 s0 len) =
+    Stream next (T4 (Just s0) Nothing Nothing Nothing) (len*2)
+    where
+      {-# INLINE next #-}
+      next (T4 (Just s) Nothing Nothing Nothing) = case next0 s of
+          Done -> Done
+          Skip s' -> Skip (T4 (Just s') Nothing Nothing Nothing)
+          Yield x xs
+              | n < 0x10000 -> Yield (fromIntegral $ shiftR n 8) (T4 (Just xs) (Just (fromIntegral n)) Nothing Nothing)
+              | otherwise   -> Yield c1                          (T4 (Just xs) (Just c2) (Just c3) (Just c4))
+              where
+                n  = ord x
+                n1 = n - 0x10000
+                c1 = fromIntegral (shiftR n1 18 + 0xD8)
+                c2 = fromIntegral (shiftR n1 10)
+                n2 = n1 .&. 0x3FF
+                c3 = fromIntegral (shiftR n2 8 + 0xDC)
+                c4 = fromIntegral n2
+      next (T4 (Just s) (Just x2) Nothing Nothing) = Yield x2 (T4 (Just s) Nothing Nothing Nothing)
+      next (T4 (Just s) (Just x2) x3 Nothing)      = Yield x2 (T4 (Just s) x3 Nothing Nothing)
+      next (T4 (Just s) (Just x2) x3 x4)           = Yield x2 (T4 (Just s) x3 x4 Nothing)
+      next _ = internalError "restreamUtf16BE"
+{-# INLINE restreamUtf16BE #-}
+
+restreamUtf16LE :: Stream Char -> Stream Word8
+restreamUtf16LE (Stream next0 s0 len) =
+    Stream next (T4 (Just s0) Nothing Nothing Nothing) (len*2)
+    where
+      {-# INLINE next #-}
+      next (T4 (Just s) Nothing Nothing Nothing) = case next0 s of
+          Done -> Done
+          Skip s' -> Skip (T4 (Just s') Nothing Nothing Nothing)
+          Yield x xs
+              | n < 0x10000 -> Yield (fromIntegral n) (T4 (Just xs) (Just (fromIntegral $ shiftR n 8)) Nothing Nothing)
+              | otherwise   -> Yield c1                          (T4 (Just xs) (Just c2) (Just c3) (Just c4))
+              where
+                n  = ord x
+                n1 = n - 0x10000
+                c2 = fromIntegral (shiftR n1 18 + 0xD8)
+                c1 = fromIntegral (shiftR n1 10)
+                n2 = n1 .&. 0x3FF
+                c4 = fromIntegral (shiftR n2 8 + 0xDC)
+                c3 = fromIntegral n2
+      next (T4 (Just s) (Just x2) Nothing Nothing) = Yield x2 (T4 (Just s) Nothing Nothing Nothing)
+      next (T4 (Just s) (Just x2) x3 Nothing)      = Yield x2 (T4 (Just s) x3 Nothing Nothing)
+      next (T4 (Just s) (Just x2) x3 x4)           = Yield x2 (T4 (Just s) x3 x4 Nothing)
+      next _ = internalError "restreamUtf16LE"
+{-# INLINE restreamUtf16LE #-}
+
+restreamUtf32BE :: Stream Char -> Stream Word8
+restreamUtf32BE (Stream next0 s0 len) =
+    Stream next (T4 (Just s0) Nothing Nothing Nothing) (len*2)
+    where
+    {-# INLINE next #-}
+    next (T4 (Just s) Nothing Nothing Nothing) = case next0 s of
+        Done       -> Done
+        Skip s'    -> Skip (T4 (Just s') Nothing Nothing Nothing)
+        Yield x xs -> Yield c1 (T4 (Just xs) (Just c2) (Just c3) (Just c4))
+          where
+            n  = ord x
+            c1 = fromIntegral $ shiftR n 24
+            c2 = fromIntegral $ shiftR n 16
+            c3 = fromIntegral $ shiftR n 8
+            c4 = fromIntegral n
+    next (T4 (Just s) (Just x2) Nothing Nothing) = Yield x2 (T4 (Just s) Nothing Nothing Nothing)
+    next (T4 (Just s) (Just x2) x3 Nothing)      = Yield x2 (T4 (Just s) x3 Nothing Nothing)
+    next (T4 (Just s) (Just x2) x3 x4)           = Yield x2 (T4 (Just s) x3 x4 Nothing)
+    next _ = internalError "restreamUtf32BE"
+{-# INLINE restreamUtf32BE #-}
+
+restreamUtf32LE :: Stream Char -> Stream Word8
+restreamUtf32LE (Stream next0 s0 len) =
+    Stream next (T4 (Just s0) Nothing Nothing Nothing) (len*2)
+    where
+    {-# INLINE next #-}
+    next (T4 (Just s) Nothing Nothing Nothing) = case next0 s of
+        Done       -> Done
+        Skip s'    -> Skip (T4 (Just s') Nothing Nothing Nothing)
+        Yield x xs -> Yield c1 (T4 (Just xs) (Just c2) (Just c3) (Just c4))
+          where
+            n  = ord x
+            c4 = fromIntegral $ shiftR n 24
+            c3 = fromIntegral $ shiftR n 16
+            c2 = fromIntegral $ shiftR n 8
+            c1 = fromIntegral n
+    next (T4 (Just s) (Just x2) Nothing Nothing) = Yield x2 (T4 (Just s) Nothing Nothing Nothing)
+    next (T4 (Just s) (Just x2) x3 Nothing)      = Yield x2 (T4 (Just s) x3 Nothing Nothing)
+    next (T4 (Just s) (Just x2) x3 x4)           = Yield x2 (T4 (Just s) x3 x4 Nothing)
+    next _ = internalError "restreamUtf32LE"
+{-# INLINE restreamUtf32LE #-}
+
+
+-- | /O(n)/ Convert a 'Stream' 'Word8' to a 'ByteString'.
+unstream :: Stream Word8 -> ByteString
+unstream (Stream next s0 len) = unsafePerformIO $ do
+    fp0 <- mallocByteString len
+    loop fp0 len 0 s0
+    where
+      loop !fp !n !off !s = case next s of
+          Done -> trimUp fp n off
+          Skip s' -> loop fp n off s'
+          Yield x s'
+              | n == off -> realloc fp n off s' x
+              | otherwise -> do
+            withForeignPtr fp $ \p -> pokeByteOff p off x
+            loop fp n (off+1) s'
+      {-# NOINLINE realloc #-}
+      realloc fp n off s x = do
+        let n' = n+n
+        fp' <- copy0 fp n n'
+        withForeignPtr fp' $ \p -> pokeByteOff p off x
+        loop fp' n' (off+1) s
+      {-# NOINLINE trimUp #-}
+      trimUp fp _ off = return $! PS fp 0 off
+      copy0 :: ForeignPtr Word8 -> Int -> Int -> IO (ForeignPtr Word8)
+      copy0 !src !srcLen !destLen = assert (srcLen <= destLen) $ do
+          dest <- mallocByteString destLen
+          withForeignPtr src  $ \src'  ->
+              withForeignPtr dest $ \dest' ->
+                  memcpy dest' src' (fromIntegral destLen)
+          return dest
+
+internalError :: String -> a
+internalError func =
+    error $ "Data.Text.Encoding.Fusion." ++ func ++ ": internal error"

File Data/Text/Fusion.hs

     -- * Types
       Stream(..)
     , Step(..)
-    , Encoding(..)
 
     -- * Creation and elimination
     , stream
     , unstream
-    , stream_bs
-    , unstream_bs
-    , restream
 
     -- * Basic interface
     , cons
      foldl1, foldr1, concatMap, any, all, maximum, minimum, take, drop,
      takeWhile, dropWhile, elem, zipWith)
 import Data.Char (ord)
-import Control.Exception (assert)
 import Control.Monad (liftM2)
 import Control.Monad.ST (runST, ST)
-import Data.Bits (shiftL, shiftR, (.&.))
-import qualified Data.ByteString as B
-import qualified Data.ByteString.Unsafe as B
-import Data.ByteString.Internal (ByteString(..), mallocByteString, memcpy)
+import Data.Bits (shiftR, (.&.))
 import qualified Data.List as L
-import Data.Word (Word8, Word16, Word32)
-import Foreign.ForeignPtr (withForeignPtr, ForeignPtr)
-import Foreign.Storable (pokeByteOff)
+import Data.Word (Word16)
 import GHC.Exts (Int(..), (+#))
-import System.IO.Unsafe (unsafePerformIO)
 import Data.Text.Internal (Text(..), empty)
-import Data.Text.UnsafeChar (unsafeChr, unsafeChr8, unsafeChr32)
+import Data.Text.UnsafeChar (unsafeChr)
 import qualified Data.Text.Array as A
-import qualified Data.Text.Utf8 as U8
 import qualified Data.Text.Utf16 as U16
-import qualified Data.Text.Utf32 as U32
 
 default(Int)
 
 infixl 2 :!:
 data PairS a b = !a :!: !b
 
-data T4 a b c d = T4 !a !b !c !d
-
 data Switch = S1 | S2
 
 data Stream a = forall s. Stream (s -> Step s a) !s {-# UNPACK #-}!Int
               | Skip !s
               | Yield !a !s
 
-data Encoding = ASCII | Utf8 | Utf16BE | Utf16LE | Utf32BE | Utf32LE
-              deriving (Read, Show, Eq)
-
 -- | /O(n)/ Convert a Text into a Stream Char.
 stream :: Text -> Stream Char
 stream (Text arr off len) = Stream next off len
                      r :: Word16
                      r = fromIntegral $ (m .&. (0x3FF :: Int)) + (0xDC00 :: Int)
 {-# INLINE [0] unstream #-}
+{-# RULES "STREAM stream/unstream fusion" forall s. stream (unstream s) = s #-}
 
 
 copy :: A.MArray s Word16 -> A.MArray s Word16 -> ST s ()
                                           cmp (next1 s1') (next2 s2')
 {-# SPECIALISE eq :: Stream Char -> Stream Char -> Bool #-}
 
--- | /O(n) Convert a ByteString into a Stream Char, using the specified encoding standard.
-stream_bs :: Encoding -> ByteString -> Stream Char
-stream_bs ASCII bs = Stream next 0 l
-    where
-      l = B.length bs
-      {-# INLINE next #-}
-      next i
-          | i >= l    = Done
-          | otherwise = Yield (unsafeChr8 x1) (i+1)
-          where
-            x1 = B.unsafeIndex bs i
-stream_bs Utf8 bs = Stream next 0 l
-    where
-      l = B.length bs
-      {-# INLINE next #-}
-      next i
-          | i >= l = Done
-          | U8.validate1 x1 = Yield (unsafeChr8 x1) (i+1)
-          | i+1 < l && U8.validate2 x1 x2 = Yield (U8.chr2 x1 x2) (i+2)
-          | i+2 < l && U8.validate3 x1 x2 x3 = Yield (U8.chr3 x1 x2 x3) (i+3)
-          | i+3 < l && U8.validate4 x1 x2 x3 x4 = Yield (U8.chr4 x1 x2 x3 x4) (i+4)
-          | otherwise = error "bsStream: bad UTF-8 stream"
-          where
-            x1 = idx i
-            x2 = idx (i + 1)
-            x3 = idx (i + 2)
-            x4 = idx (i + 3)
-            idx = B.unsafeIndex bs
-stream_bs Utf16LE bs = Stream next 0 l
-    where
-      l = B.length bs
-      {-# INLINE next #-}
-      next i
-          | i >= l                         = Done
-          | i+1 < l && U16.validate1 x1    = Yield (unsafeChr x1) (i+2)
-          | i+3 < l && U16.validate2 x1 x2 = Yield (U16.chr2 x1 x2) (i+4)
-          | otherwise = error $ "bsStream: bad UTF-16LE stream"
-          where
-            x1    = (shiftL (idx (i + 1)) 8) + (idx i)
-            x2    = (shiftL (idx (i + 3)) 8) + (idx (i + 2))
-            idx = fromIntegral . B.unsafeIndex bs :: Int -> Word16
-stream_bs Utf16BE bs = Stream next 0 l
-    where
-      l = B.length bs
-      {-# INLINE next #-}
-      next i
-          | i >= l                         = Done
-          | i+1 < l && U16.validate1 x1    = Yield (unsafeChr x1) (i+2)
-          | i+3 < l && U16.validate2 x1 x2 = Yield (U16.chr2 x1 x2) (i+4)
-          | otherwise = error $ "bsStream: bad UTF16-BE stream "
-          where
-            x1    = (shiftL (idx i) 8) + (idx (i + 1))
-            x2    = (shiftL (idx (i + 2)) 8) + (idx (i + 3))
-            idx = fromIntegral . B.unsafeIndex bs :: Int -> Word16
-stream_bs Utf32BE bs = Stream next 0 l
-    where
-      l = B.length bs
-      {-# INLINE next #-}
-      next i
-          | i >= l                    = Done
-          | i+3 < l && U32.validate x = Yield (unsafeChr32 x) (i+4)
-          | otherwise                 = error "bsStream: bad UTF-32BE stream"
-          where
-            x     = shiftL x1 24 + shiftL x2 16 + shiftL x3 8 + x4
-            x1    = idx i
-            x2    = idx (i+1)
-            x3    = idx (i+2)
-            x4    = idx (i+3)
-            idx = fromIntegral . B.unsafeIndex bs :: Int -> Word32
-stream_bs Utf32LE bs = Stream next 0 l
-    where
-      l = B.length bs
-      {-# INLINE next #-}
-      next i
-          | i >= l                    = Done
-          | i+3 < l && U32.validate x = Yield (unsafeChr32 x) (i+4)
-          | otherwise                 = error "bsStream: bad UTF-32LE stream"
-          where
-            x     = shiftL x4 24 + shiftL x3 16 + shiftL x2 8 + x1
-            x1    = idx i
-            x2    = idx $ i+1
-            x3    = idx $ i+2
-            x4    = idx $ i+3
-            idx = fromIntegral . B.unsafeIndex bs :: Int -> Word32
-{-# INLINE [0] stream_bs #-}
-
 internalError :: String -> a
 internalError func = error $ "Data.Text.Fusion." ++ func ++ ": internal error"
 
--- | /O(n)/ Convert a Stream Char into a Stream Word8 using the specified encoding standard.
-restream :: Encoding -> Stream Char -> Stream Word8
-restream ASCII (Stream next0 s0 len) =  Stream next s0 (len*2)
-    where
-      next !s = case next0 s of
-                  Done -> Done
-                  Skip s' -> Skip s'
-                  Yield x xs -> Yield x' xs
-                      where x' = fromIntegral (ord x) :: Word8
-restream Utf8 (Stream next0 s0 len) =
-    Stream next (T4 (Just s0) Nothing Nothing Nothing) (len*2)
-    where
-      {-# INLINE next #-}
-      next (T4 (Just s) Nothing Nothing Nothing) = case next0 s of
-                  Done              -> Done
-                  Skip s'           -> Skip (T4 (Just s') Nothing Nothing Nothing)
-                  Yield x xs
-                      | n <= 0x7F   -> Yield c         (T4 (Just xs) Nothing Nothing Nothing)
-                      | n <= 0x07FF -> Yield (fst c2)  (T4 (Just xs) (Just $ snd c2) Nothing Nothing)
-                      | n <= 0xFFFF -> Yield (fst3 c3) (T4 (Just xs) (Just $ snd3 c3) (Just $ trd3 c3) Nothing)
-                      | otherwise   -> Yield (fst4 c4) (T4 (Just xs) (Just $ snd4 c4) (Just $ trd4 c4) (Just $ fth4 c4))
-                      where
-                        n  = ord x
-                        c  = fromIntegral n
-                        c2 = U8.ord2 x
-                        c3 = U8.ord3 x
-                        c4 = U8.ord4 x
-                        fst3 (x1,_,_)   = x1
-                        snd3 (_,x2,_)   = x2
-                        trd3 (_,_,x3)   = x3
-                        fst4 (x1,_,_,_) = x1
-                        snd4 (_,x2,_,_) = x2
-                        trd4 (_,_,x3,_) = x3
-                        fth4 (_,_,_,x4) = x4
-      next (T4 (Just s) (Just x2) Nothing Nothing) = Yield x2 (T4 (Just s) Nothing Nothing Nothing)
-      next (T4 (Just s) (Just x2) x3 Nothing)      = Yield x2 (T4 (Just s) x3 Nothing Nothing)
-      next (T4 (Just s) (Just x2) x3 x4)           = Yield x2 (T4 (Just s) x3 x4 Nothing)
-      next _ = internalError "restream Utf8"
-restream Utf16BE (Stream next0 s0 len) =
-    Stream next (T4 (Just s0) Nothing Nothing Nothing) (len*2)
-    where
-      {-# INLINE next #-}
-      next (T4 (Just s) Nothing Nothing Nothing) = case next0 s of
-          Done -> Done
-          Skip s' -> Skip (T4 (Just s') Nothing Nothing Nothing)
-          Yield x xs
-              | n < 0x10000 -> Yield (fromIntegral $ shiftR n 8) (T4 (Just xs) (Just (fromIntegral n)) Nothing Nothing)
-              | otherwise   -> Yield c1                          (T4 (Just xs) (Just c2) (Just c3) (Just c4))
-              where
-                n  = ord x
-                n1 = n - 0x10000
-                c1 = fromIntegral (shiftR n1 18 + 0xD8)
-                c2 = fromIntegral (shiftR n1 10)
-                n2 = n1 .&. 0x3FF
-                c3 = fromIntegral (shiftR n2 8 + 0xDC)
-                c4 = fromIntegral n2
-      next (T4 (Just s) (Just x2) Nothing Nothing) = Yield x2 (T4 (Just s) Nothing Nothing Nothing)
-      next (T4 (Just s) (Just x2) x3 Nothing)      = Yield x2 (T4 (Just s) x3 Nothing Nothing)
-      next (T4 (Just s) (Just x2) x3 x4)           = Yield x2 (T4 (Just s) x3 x4 Nothing)
-      next _ = internalError "restream Utf16BE"
-restream Utf16LE (Stream next0 s0 len) =
-    Stream next (T4 (Just s0) Nothing Nothing Nothing) (len*2)
-    where
-      {-# INLINE next #-}
-      next (T4 (Just s) Nothing Nothing Nothing) = case next0 s of
-          Done -> Done
-          Skip s' -> Skip (T4 (Just s') Nothing Nothing Nothing)
-          Yield x xs
-              | n < 0x10000 -> Yield (fromIntegral n) (T4 (Just xs) (Just (fromIntegral $ shiftR n 8)) Nothing Nothing)
-              | otherwise   -> Yield c1                          (T4 (Just xs) (Just c2) (Just c3) (Just c4))
-              where
-                n  = ord x
-                n1 = n - 0x10000
-                c2 = fromIntegral (shiftR n1 18 + 0xD8)
-                c1 = fromIntegral (shiftR n1 10)
-                n2 = n1 .&. 0x3FF
-                c4 = fromIntegral (shiftR n2 8 + 0xDC)
-                c3 = fromIntegral n2
-      next (T4 (Just s) (Just x2) Nothing Nothing) = Yield x2 (T4 (Just s) Nothing Nothing Nothing)
-      next (T4 (Just s) (Just x2) x3 Nothing)      = Yield x2 (T4 (Just s) x3 Nothing Nothing)
-      next (T4 (Just s) (Just x2) x3 x4)           = Yield x2 (T4 (Just s) x3 x4 Nothing)
-      next _ = internalError "restream Utf16LE"
-restream Utf32BE (Stream next0 s0 len) =
-    Stream next (T4 (Just s0) Nothing Nothing Nothing) (len*2)
-    where
-    {-# INLINE next #-}
-    next (T4 (Just s) Nothing Nothing Nothing) = case next0 s of
-        Done       -> Done
-        Skip s'    -> Skip (T4 (Just s') Nothing Nothing Nothing)
-        Yield x xs -> Yield c1 (T4 (Just xs) (Just c2) (Just c3) (Just c4))
-          where
-            n  = ord x
-            c1 = fromIntegral $ shiftR n 24
-            c2 = fromIntegral $ shiftR n 16
-            c3 = fromIntegral $ shiftR n 8
-            c4 = fromIntegral n
-    next (T4 (Just s) (Just x2) Nothing Nothing) = Yield x2 (T4 (Just s) Nothing Nothing Nothing)
-    next (T4 (Just s) (Just x2) x3 Nothing)      = Yield x2 (T4 (Just s) x3 Nothing Nothing)
-    next (T4 (Just s) (Just x2) x3 x4)           = Yield x2 (T4 (Just s) x3 x4 Nothing)
-    next _ = internalError "restream Utf32BE"
-restream Utf32LE (Stream next0 s0 len) =
-    Stream next (T4 (Just s0) Nothing Nothing Nothing) (len*2)
-    where
-    {-# INLINE next #-}
-    next (T4 (Just s) Nothing Nothing Nothing) = case next0 s of
-        Done       -> Done
-        Skip s'    -> Skip (T4 (Just s') Nothing Nothing Nothing)
-        Yield x xs -> Yield c1 (T4 (Just xs) (Just c2) (Just c3) (Just c4))
-          where
-            n  = ord x
-            c4 = fromIntegral $ shiftR n 24
-            c3 = fromIntegral $ shiftR n 16
-            c2 = fromIntegral $ shiftR n 8
-            c1 = fromIntegral n
-    next (T4 (Just s) (Just x2) Nothing Nothing) = Yield x2 (T4 (Just s) Nothing Nothing Nothing)
-    next (T4 (Just s) (Just x2) x3 Nothing)      = Yield x2 (T4 (Just s) x3 Nothing Nothing)
-    next (T4 (Just s) (Just x2) x3 x4)           = Yield x2 (T4 (Just s) x3 x4 Nothing)
-    next _ = internalError "restream Utf32LE"
-{-# INLINE restream #-}
-
-
--- | /O(n)/ Convert a Stream Word8 to a ByteString
-unstream_bs :: Stream Word8 -> ByteString
-unstream_bs (Stream next s0 len) = unsafePerformIO $ do
-    fp0 <- mallocByteString len
-    loop fp0 len 0 s0
-    where
-      loop !fp !n !off !s = case next s of
-          Done -> trimUp fp n off
-          Skip s' -> loop fp n off s'
-          Yield x s'
-              | n == off -> realloc fp n off s' x
-              | otherwise -> do
-            withForeignPtr fp $ \p -> pokeByteOff p off x
-            loop fp n (off+1) s'
-      {-# NOINLINE realloc #-}
-      realloc fp n off s x = do
-        let n' = n+n
-        fp' <- copy0 fp n n'
-        withForeignPtr fp' $ \p -> pokeByteOff p off x
-        loop fp' n' (off+1) s
-      {-# NOINLINE trimUp #-}
-      trimUp fp _ off = return $! PS fp 0 off
-      copy0 :: ForeignPtr Word8 -> Int -> Int -> IO (ForeignPtr Word8)
-      copy0 !src !srcLen !destLen = assert (srcLen <= destLen) $ do
-          dest <- mallocByteString destLen
-          withForeignPtr src  $ \src'  ->
-              withForeignPtr dest $ \dest' ->
-                  memcpy dest' src' (fromIntegral destLen)
-          return dest
-{-# RULES "STREAM stream/unstream fusion" forall s. stream (unstream s) = s #-}
-
 -- ----------------------------------------------------------------------------
 -- * Basic stream functions
 

File tests/Bench.hs

 import Control.Exception
 
 import qualified Data.Text as T
+import qualified Data.Text.Encoding as E
 import Data.Text.Internal
 import qualified Data.Text.Fusion as S
-import Data.Text.Fusion (Encoding(..))
 
 import qualified Data.List as L
 import qualified Data.ByteString.Char8 as B
 import qualified System.IO.UTF8 as UTF8
 
 main = do ascii_bs <- B.readFile "text/test/ascii.txt"
-          let ascii_txt = T.decode ASCII ascii_bs 
+          let ascii_txt = E.decodeASCII ascii_bs 
           let ascii_str = T.unpack ascii_txt
           force (ascii_txt,ascii_str,ascii_bs)
           printf " # Text\t\tString\tByteString\n"
   exposed-modules:
     Data.Text
     Data.Text.Array
+    Data.Text.Encoding
+    Data.Text.Encoding.Fusion
     Data.Text.UnsafeChar
     Data.Text.Internal
     Data.Text.Fusion