Bryan O'Sullivan avatar Bryan O'Sullivan committed d020a03

Factor encoding restreaming functions into another module

Comments (0)

Files changed (3)

Data/Text/Encoding/Fusion.hs

 {-# LANGUAGE BangPatterns #-}
 
 -- |
--- Module      : Data.Text.Encoding
+-- Module      : Data.Text.Encoding.Fusion
 -- Copyright   : (c) Tom Harper 2008-2009,
 --               (c) Bryan O'Sullivan 2009,
 --               (c) Duncan Coutts 2009
     -- * Unstreaming
     , unstream
 
-    -- * Restreaming
-    -- Restreaming is the act of converting from one 'Stream'
-    -- representation to another.
-    , restreamUtf8
-    , restreamUtf16LE
-    , restreamUtf16BE
-    , restreamUtf32LE
-    , restreamUtf32BE
+    , module Data.Text.Encoding.Fusion.Common
     ) where
 
 import Control.Exception (assert)
-import Data.Bits (shiftL, shiftR, (.&.))
+import Data.Bits (shiftL)
 import Data.ByteString as B
 import Data.ByteString.Internal (ByteString(..), mallocByteString, memcpy)
-import Data.Char (ord)
 import Data.Text.Fusion (Step(..), Stream(..))
-import Data.Text.Fusion.Internal (M(..))
+import Data.Text.Encoding.Fusion.Common
 import Data.Text.UnsafeChar (unsafeChr, unsafeChr8, unsafeChr32)
 import Data.Word (Word8, Word16, Word32)
 import Foreign.ForeignPtr (withForeignPtr, ForeignPtr)
 import qualified Data.Text.Encoding.Utf16 as U16
 import qualified Data.Text.Encoding.Utf32 as U32
 
-type M8 = M Word8
-
--- Restreaming state.
-data S s = S {-# UNPACK #-} !s
-    {-# UNPACK #-} !M8 {-# UNPACK #-} !M8 {-# UNPACK #-} !M8
-
 streamASCII :: ByteString -> Stream Char
 streamASCII bs = Stream next 0 l
     where
             idx = fromIntegral . B.unsafeIndex bs :: Int -> Word32
 {-# INLINE [0] streamUtf32LE #-}
 
--- | /O(n)/ Convert a Stream Char into a UTF-8 encoded Stream Word8.
-restreamUtf8 :: Stream Char -> Stream Word8
-restreamUtf8 (Stream next0 s0 len) =
-    Stream next (S s0 N N N) (len*2)
-    where
-      {-# INLINE next #-}
-      next (S s N N N) = case next0 s of
-                  Done              -> Done
-                  Skip s'           -> Skip (S s' N N N)
-                  Yield x xs
-                      | n <= 0x7F   -> Yield c  (S xs N N N)
-                      | n <= 0x07FF -> Yield a2 (S xs (J b2) N N)
-                      | n <= 0xFFFF -> Yield a3 (S xs (J b3) (J c3) N)
-                      | otherwise   -> Yield a4 (S xs (J b4) (J c4) (J d4))
-                      where
-                        n  = ord x
-                        c  = fromIntegral n
-                        (a2,b2) = U8.ord2 x
-                        (a3,b3,c3) = U8.ord3 x
-                        (a4,b4,c4,d4) = U8.ord4 x
-      next (S s (J x2) N N)   = Yield x2 (S s N N N)
-      next (S s (J x2) x3 N)  = Yield x2 (S s x3 N N)
-      next (S s (J x2) x3 x4) = Yield x2 (S s x3 x4 N)
-      next _ = internalError "restreamUtf8"
-{-# INLINE restreamUtf8 #-}
-
-restreamUtf16BE :: Stream Char -> Stream Word8
-restreamUtf16BE (Stream next0 s0 len) =
-    Stream next (S s0 N N N) (len*2)
-    where
-      {-# INLINE next #-}
-      next (S s N N N) = case next0 s of
-          Done -> Done
-          Skip s' -> Skip (S s' N N N)
-          Yield x xs
-              | n < 0x10000 -> Yield (fromIntegral $ n `shiftR` 8) $
-                               S xs (J $ fromIntegral n) N N
-              | otherwise   -> Yield c1 $
-                               S xs (J c2) (J c3) (J c4)
-              where
-                n  = ord x
-                n1 = n - 0x10000
-                c1 = fromIntegral (n1 `shiftR` 18 + 0xD8)
-                c2 = fromIntegral (n1 `shiftR` 10)
-                n2 = n1 .&. 0x3FF
-                c3 = fromIntegral (n2 `shiftR` 8 + 0xDC)
-                c4 = fromIntegral n2
-      next (S s (J x2) N N)   = Yield x2 (S s N N N)
-      next (S s (J x2) x3 N)  = Yield x2 (S s x3 N N)
-      next (S s (J x2) x3 x4) = Yield x2 (S s x3 x4 N)
-      next _ = internalError "restreamUtf16BE"
-{-# INLINE restreamUtf16BE #-}
-
-restreamUtf16LE :: Stream Char -> Stream Word8
-restreamUtf16LE (Stream next0 s0 len) =
-    Stream next (S s0 N N N) (len*2)
-    where
-      {-# INLINE next #-}
-      next (S s N N N) = case next0 s of
-          Done -> Done
-          Skip s' -> Skip (S s' N N N)
-          Yield x xs
-              | n < 0x10000 -> Yield (fromIntegral n) $
-                               S xs (J (fromIntegral $ shiftR n 8)) N N
-              | otherwise   -> Yield c1 $
-                               S xs (J c2) (J c3) (J c4)
-              where
-                n  = ord x
-                n1 = n - 0x10000
-                c2 = fromIntegral (shiftR n1 18 + 0xD8)
-                c1 = fromIntegral (shiftR n1 10)
-                n2 = n1 .&. 0x3FF
-                c4 = fromIntegral (shiftR n2 8 + 0xDC)
-                c3 = fromIntegral n2
-      next (S s (J x2) N N)   = Yield x2 (S s N N N)
-      next (S s (J x2) x3 N)  = Yield x2 (S s x3 N N)
-      next (S s (J x2) x3 x4) = Yield x2 (S s x3 x4 N)
-      next _ = internalError "restreamUtf16LE"
-{-# INLINE restreamUtf16LE #-}
-
-restreamUtf32BE :: Stream Char -> Stream Word8
-restreamUtf32BE (Stream next0 s0 len) =
-    Stream next (S s0 N N N) (len*2)
-    where
-    {-# INLINE next #-}
-    next (S s N N N) = case next0 s of
-        Done       -> Done
-        Skip s'    -> Skip (S s' N N N)
-        Yield x xs -> Yield c1 (S xs (J c2) (J c3) (J c4))
-          where
-            n  = ord x
-            c1 = fromIntegral $ shiftR n 24
-            c2 = fromIntegral $ shiftR n 16
-            c3 = fromIntegral $ shiftR n 8
-            c4 = fromIntegral n
-    next (S s (J x2) N N) = Yield x2 (S s N N N)
-    next (S s (J x2) x3 N)      = Yield x2 (S s x3 N N)
-    next (S s (J x2) x3 x4)           = Yield x2 (S s x3 x4 N)
-    next _ = internalError "restreamUtf32BE"
-{-# INLINE restreamUtf32BE #-}
-
-restreamUtf32LE :: Stream Char -> Stream Word8
-restreamUtf32LE (Stream next0 s0 len) =
-    Stream next (S s0 N N N) (len*2)
-    where
-    {-# INLINE next #-}
-    next (S s N N N) = case next0 s of
-        Done       -> Done
-        Skip s'    -> Skip (S s' N N N)
-        Yield x xs -> Yield c1 (S xs (J c2) (J c3) (J c4))
-          where
-            n  = ord x
-            c4 = fromIntegral $ shiftR n 24
-            c3 = fromIntegral $ shiftR n 16
-            c2 = fromIntegral $ shiftR n 8
-            c1 = fromIntegral n
-    next (S s (J x2) N N)   = Yield x2 (S s N N N)
-    next (S s (J x2) x3 N)  = Yield x2 (S s x3 N N)
-    next (S s (J x2) x3 x4) = Yield x2 (S s x3 x4 N)
-    next _ = internalError "restreamUtf32LE"
-{-# INLINE restreamUtf32LE #-}
-
-
 -- | /O(n)/ Convert a 'Stream' 'Word8' to a 'ByteString'.
 unstream :: Stream Word8 -> ByteString
 unstream (Stream next s0 len) = unsafePerformIO $ do
                   memcpy dest' src' (fromIntegral destLen)
           return dest
 
-internalError :: String -> a
-internalError func =
-    error $ "Data.Text.Encoding.Fusion." ++ func ++ ": internal error"
-
 encodingError :: String -> a
 encodingError encoding =
     error $ "Data.Text.Encoding.Fusion: Bad " ++ encoding ++ " stream"

Data/Text/Encoding/Fusion/Common.hs

+{-# LANGUAGE BangPatterns #-}
+
+-- |
+-- Module      : Data.Text.Encoding.Fusion.Common
+-- Copyright   : (c) Tom Harper 2008-2009,
+--               (c) Bryan O'Sullivan 2009,
+--               (c) Duncan Coutts 2009
+--
+-- License     : BSD-style
+-- Maintainer  : rtharper@aftereternity.co.uk, bos@serpentine.com,
+--               duncan@haskell.org
+-- Stability   : experimental
+-- Portability : portable
+--
+-- Fusible 'Stream'-oriented functions for converting between 'Text'
+-- and several common encodings.
+
+module Data.Text.Encoding.Fusion.Common
+    (
+    -- * Restreaming
+    -- Restreaming is the act of converting from one 'Stream'
+    -- representation to another.
+      restreamUtf8
+    , restreamUtf16LE
+    , restreamUtf16BE
+    , restreamUtf32LE
+    , restreamUtf32BE
+    ) where
+
+import Data.Bits (shiftL, shiftR, (.&.))
+import Data.Char (ord)
+import Data.Text.Fusion (Step(..), Stream(..))
+import Data.Text.Fusion.Internal (M(..))
+import Data.Word (Word8)
+import qualified Data.Text.Encoding.Utf8 as U8
+
+type M8 = M Word8
+
+-- Restreaming state.
+data S s = S {-# UNPACK #-} !s
+    {-# UNPACK #-} !M8 {-# UNPACK #-} !M8 {-# UNPACK #-} !M8
+
+-- | /O(n)/ Convert a Stream Char into a UTF-8 encoded Stream Word8.
+restreamUtf8 :: Stream Char -> Stream Word8
+restreamUtf8 (Stream next0 s0 len) =
+    Stream next (S s0 N N N) (len*2)
+    where
+      {-# INLINE next #-}
+      next (S s N N N) = case next0 s of
+                  Done              -> Done
+                  Skip s'           -> Skip (S s' N N N)
+                  Yield x xs
+                      | n <= 0x7F   -> Yield c  (S xs N N N)
+                      | n <= 0x07FF -> Yield a2 (S xs (J b2) N N)
+                      | n <= 0xFFFF -> Yield a3 (S xs (J b3) (J c3) N)
+                      | otherwise   -> Yield a4 (S xs (J b4) (J c4) (J d4))
+                      where
+                        n  = ord x
+                        c  = fromIntegral n
+                        (a2,b2) = U8.ord2 x
+                        (a3,b3,c3) = U8.ord3 x
+                        (a4,b4,c4,d4) = U8.ord4 x
+      next (S s (J x2) N N)   = Yield x2 (S s N N N)
+      next (S s (J x2) x3 N)  = Yield x2 (S s x3 N N)
+      next (S s (J x2) x3 x4) = Yield x2 (S s x3 x4 N)
+      next _ = internalError "restreamUtf8"
+{-# INLINE restreamUtf8 #-}
+
+restreamUtf16BE :: Stream Char -> Stream Word8
+restreamUtf16BE (Stream next0 s0 len) =
+    Stream next (S s0 N N N) (len*2)
+    where
+      {-# INLINE next #-}
+      next (S s N N N) = case next0 s of
+          Done -> Done
+          Skip s' -> Skip (S s' N N N)
+          Yield x xs
+              | n < 0x10000 -> Yield (fromIntegral $ n `shiftR` 8) $
+                               S xs (J $ fromIntegral n) N N
+              | otherwise   -> Yield c1 $
+                               S xs (J c2) (J c3) (J c4)
+              where
+                n  = ord x
+                n1 = n - 0x10000
+                c1 = fromIntegral (n1 `shiftR` 18 + 0xD8)
+                c2 = fromIntegral (n1 `shiftR` 10)
+                n2 = n1 .&. 0x3FF
+                c3 = fromIntegral (n2 `shiftR` 8 + 0xDC)
+                c4 = fromIntegral n2
+      next (S s (J x2) N N)   = Yield x2 (S s N N N)
+      next (S s (J x2) x3 N)  = Yield x2 (S s x3 N N)
+      next (S s (J x2) x3 x4) = Yield x2 (S s x3 x4 N)
+      next _ = internalError "restreamUtf16BE"
+{-# INLINE restreamUtf16BE #-}
+
+restreamUtf16LE :: Stream Char -> Stream Word8
+restreamUtf16LE (Stream next0 s0 len) =
+    Stream next (S s0 N N N) (len*2)
+    where
+      {-# INLINE next #-}
+      next (S s N N N) = case next0 s of
+          Done -> Done
+          Skip s' -> Skip (S s' N N N)
+          Yield x xs
+              | n < 0x10000 -> Yield (fromIntegral n) $
+                               S xs (J (fromIntegral $ shiftR n 8)) N N
+              | otherwise   -> Yield c1 $
+                               S xs (J c2) (J c3) (J c4)
+              where
+                n  = ord x
+                n1 = n - 0x10000
+                c2 = fromIntegral (shiftR n1 18 + 0xD8)
+                c1 = fromIntegral (shiftR n1 10)
+                n2 = n1 .&. 0x3FF
+                c4 = fromIntegral (shiftR n2 8 + 0xDC)
+                c3 = fromIntegral n2
+      next (S s (J x2) N N)   = Yield x2 (S s N N N)
+      next (S s (J x2) x3 N)  = Yield x2 (S s x3 N N)
+      next (S s (J x2) x3 x4) = Yield x2 (S s x3 x4 N)
+      next _ = internalError "restreamUtf16LE"
+{-# INLINE restreamUtf16LE #-}
+
+restreamUtf32BE :: Stream Char -> Stream Word8
+restreamUtf32BE (Stream next0 s0 len) =
+    Stream next (S s0 N N N) (len*2)
+    where
+    {-# INLINE next #-}
+    next (S s N N N) = case next0 s of
+        Done       -> Done
+        Skip s'    -> Skip (S s' N N N)
+        Yield x xs -> Yield c1 (S xs (J c2) (J c3) (J c4))
+          where
+            n  = ord x
+            c1 = fromIntegral $ shiftR n 24
+            c2 = fromIntegral $ shiftR n 16
+            c3 = fromIntegral $ shiftR n 8
+            c4 = fromIntegral n
+    next (S s (J x2) N N) = Yield x2 (S s N N N)
+    next (S s (J x2) x3 N)      = Yield x2 (S s x3 N N)
+    next (S s (J x2) x3 x4)           = Yield x2 (S s x3 x4 N)
+    next _ = internalError "restreamUtf32BE"
+{-# INLINE restreamUtf32BE #-}
+
+restreamUtf32LE :: Stream Char -> Stream Word8
+restreamUtf32LE (Stream next0 s0 len) =
+    Stream next (S s0 N N N) (len*2)
+    where
+    {-# INLINE next #-}
+    next (S s N N N) = case next0 s of
+        Done       -> Done
+        Skip s'    -> Skip (S s' N N N)
+        Yield x xs -> Yield c1 (S xs (J c2) (J c3) (J c4))
+          where
+            n  = ord x
+            c4 = fromIntegral $ shiftR n 24
+            c3 = fromIntegral $ shiftR n 16
+            c2 = fromIntegral $ shiftR n 8
+            c1 = fromIntegral n
+    next (S s (J x2) N N)   = Yield x2 (S s N N N)
+    next (S s (J x2) x3 N)  = Yield x2 (S s x3 N N)
+    next (S s (J x2) x3 x4) = Yield x2 (S s x3 x4 N)
+    next _ = internalError "restreamUtf32LE"
+{-# INLINE restreamUtf32LE #-}
+
+internalError :: String -> a
+internalError func =
+    error $ "Data.Text.Encoding.Fusion.Common." ++ func ++ ": internal error"
     Data.Text
     Data.Text.Encoding
     Data.Text.Encoding.Fusion
+    Data.Text.Encoding.Fusion.Common
     Data.Text.Foreign
     Data.Text.Fusion
     Data.Text.Fusion.Common
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.