text / Data / Text / Encoding / Fusion / Common.hs

The default branch has multiple heads

{-# LANGUAGE BangPatterns #-}

-- |
-- Module      : Data.Text.Encoding.Fusion.Common
-- Copyright   : (c) Tom Harper 2008-2009,
--               (c) Bryan O'Sullivan 2009,
--               (c) Duncan Coutts 2009
--
-- License     : BSD-style
-- Maintainer  : bos@serpentine.com, rtomharper@googlemail.com,
--               duncan@haskell.org
-- Stability   : experimental
-- Portability : portable
--
-- Fusible 'Stream'-oriented functions for converting between 'Text'
-- and several common encodings.

module Data.Text.Encoding.Fusion.Common
    (
    -- * Restreaming
    -- Restreaming is the act of converting from one 'Stream'
    -- representation to another.
      restreamUtf8
    , restreamUtf16LE
    , restreamUtf16BE
    , restreamUtf32LE
    , restreamUtf32BE
    ) where

import Data.Bits ((.&.))
import Data.Text.Fusion (Step(..), Stream(..))
import Data.Text.Fusion.Internal (M(..), S(..))
import Data.Text.UnsafeChar (ord)
import Data.Text.UnsafeShift (shiftR)
import Data.Word (Word8)
import qualified Data.Text.Encoding.Utf8 as U8

-- | /O(n)/ Convert a Stream Char into a UTF-8 encoded Stream Word8.
restreamUtf8 :: Stream Char -> Stream Word8
restreamUtf8 (Stream next0 s0 len) =
    Stream next (S s0 N N N) (len*2)
    where
      {-# INLINE next #-}
      next (S s N N N) = case next0 s of
                  Done              -> Done
                  Skip s'           -> Skip (S s' N N N)
                  Yield x xs
                      | n <= 0x7F   -> Yield c  (S xs N N N)
                      | n <= 0x07FF -> Yield a2 (S xs (J b2) N N)
                      | n <= 0xFFFF -> Yield a3 (S xs (J b3) (J c3) N)
                      | otherwise   -> Yield a4 (S xs (J b4) (J c4) (J d4))
                      where
                        n  = ord x
                        c  = fromIntegral n
                        (a2,b2) = U8.ord2 x
                        (a3,b3,c3) = U8.ord3 x
                        (a4,b4,c4,d4) = U8.ord4 x
      next (S s (J x2) N N)   = Yield x2 (S s N N N)
      next (S s (J x2) x3 N)  = Yield x2 (S s x3 N N)
      next (S s (J x2) x3 x4) = Yield x2 (S s x3 x4 N)
      next _ = internalError "restreamUtf8"
{-# INLINE restreamUtf8 #-}

restreamUtf16BE :: Stream Char -> Stream Word8
restreamUtf16BE (Stream next0 s0 len) =
    Stream next (S s0 N N N) (len*2)
    where
      {-# INLINE next #-}
      next (S s N N N) = case next0 s of
          Done -> Done
          Skip s' -> Skip (S s' N N N)
          Yield x xs
              | n < 0x10000 -> Yield (fromIntegral $ n `shiftR` 8) $
                               S xs (J $ fromIntegral n) N N
              | otherwise   -> Yield c1 $
                               S xs (J c2) (J c3) (J c4)
              where
                n  = ord x
                n1 = n - 0x10000
                c1 = fromIntegral (n1 `shiftR` 18 + 0xD8)
                c2 = fromIntegral (n1 `shiftR` 10)
                n2 = n1 .&. 0x3FF
                c3 = fromIntegral (n2 `shiftR` 8 + 0xDC)
                c4 = fromIntegral n2
      next (S s (J x2) N N)   = Yield x2 (S s N N N)
      next (S s (J x2) x3 N)  = Yield x2 (S s x3 N N)
      next (S s (J x2) x3 x4) = Yield x2 (S s x3 x4 N)
      next _ = internalError "restreamUtf16BE"
{-# INLINE restreamUtf16BE #-}

restreamUtf16LE :: Stream Char -> Stream Word8
restreamUtf16LE (Stream next0 s0 len) =
    Stream next (S s0 N N N) (len*2)
    where
      {-# INLINE next #-}
      next (S s N N N) = case next0 s of
          Done -> Done
          Skip s' -> Skip (S s' N N N)
          Yield x xs
              | n < 0x10000 -> Yield (fromIntegral n) $
                               S xs (J (fromIntegral $ shiftR n 8)) N N
              | otherwise   -> Yield c1 $
                               S xs (J c2) (J c3) (J c4)
              where
                n  = ord x
                n1 = n - 0x10000
                c2 = fromIntegral (shiftR n1 18 + 0xD8)
                c1 = fromIntegral (shiftR n1 10)
                n2 = n1 .&. 0x3FF
                c4 = fromIntegral (shiftR n2 8 + 0xDC)
                c3 = fromIntegral n2
      next (S s (J x2) N N)   = Yield x2 (S s N N N)
      next (S s (J x2) x3 N)  = Yield x2 (S s x3 N N)
      next (S s (J x2) x3 x4) = Yield x2 (S s x3 x4 N)
      next _ = internalError "restreamUtf16LE"
{-# INLINE restreamUtf16LE #-}

restreamUtf32BE :: Stream Char -> Stream Word8
restreamUtf32BE (Stream next0 s0 len) =
    Stream next (S s0 N N N) (len*2)
    where
    {-# INLINE next #-}
    next (S s N N N) = case next0 s of
        Done       -> Done
        Skip s'    -> Skip (S s' N N N)
        Yield x xs -> Yield c1 (S xs (J c2) (J c3) (J c4))
          where
            n  = ord x
            c1 = fromIntegral $ shiftR n 24
            c2 = fromIntegral $ shiftR n 16
            c3 = fromIntegral $ shiftR n 8
            c4 = fromIntegral n
    next (S s (J x2) N N) = Yield x2 (S s N N N)
    next (S s (J x2) x3 N)      = Yield x2 (S s x3 N N)
    next (S s (J x2) x3 x4)           = Yield x2 (S s x3 x4 N)
    next _ = internalError "restreamUtf32BE"
{-# INLINE restreamUtf32BE #-}

restreamUtf32LE :: Stream Char -> Stream Word8
restreamUtf32LE (Stream next0 s0 len) =
    Stream next (S s0 N N N) (len*2)
    where
    {-# INLINE next #-}
    next (S s N N N) = case next0 s of
        Done       -> Done
        Skip s'    -> Skip (S s' N N N)
        Yield x xs -> Yield c1 (S xs (J c2) (J c3) (J c4))
          where
            n  = ord x
            c4 = fromIntegral $ shiftR n 24
            c3 = fromIntegral $ shiftR n 16
            c2 = fromIntegral $ shiftR n 8
            c1 = fromIntegral n
    next (S s (J x2) N N)   = Yield x2 (S s N N N)
    next (S s (J x2) x3 N)  = Yield x2 (S s x3 N N)
    next (S s (J x2) x3 x4) = Yield x2 (S s x3 x4 N)
    next _ = internalError "restreamUtf32LE"
{-# INLINE restreamUtf32LE #-}

internalError :: String -> a
internalError func =
    error $ "Data.Text.Encoding.Fusion.Common." ++ func ++ ": internal error"
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.