Source

text / Data / Text / Encoding / Fusion / Common.hs

Full commit
{-# LANGUAGE BangPatterns #-}

-- |
-- Module      : Data.Text.Encoding.Fusion.Common
-- Copyright   : (c) Tom Harper 2008-2009,
--               (c) Bryan O'Sullivan 2009,
--               (c) Duncan Coutts 2009,
--               (c) Jasper Van der Jeugt 2011
--
-- License     : BSD-style
-- Maintainer  : bos@serpentine.com, rtomharper@googlemail.com,
--               duncan@haskell.org
-- Stability   : experimental
-- Portability : portable
--
-- Fusible 'Stream'-oriented functions for converting between 'Text'
-- and several common encodings.

module Data.Text.Encoding.Fusion.Common
    (
    -- * Restreaming
    -- Restreaming is the act of converting from one 'Stream'
    -- representation to another.
      restreamUtf8
    , restreamUtf16LE
    , restreamUtf16BE
    , restreamUtf32LE
    , restreamUtf32BE
    ) where

import Data.Bits ((.&.))
import Data.Text.Fusion (Step(..), Stream(..))
import Data.Text.Fusion.Internal (RS(..))
import Data.Text.UnsafeChar (ord)
import Data.Text.UnsafeShift (shiftR)
import Data.Word (Word8)
import qualified Data.Text.Encoding.Utf8 as U8

-- | /O(n)/ Convert a Stream Char into a UTF-8 encoded Stream Word8.
restreamUtf8 :: Stream Char -> Stream Word8
restreamUtf8 (Stream next0 s0 len) = Stream next (RS0 s0) (len * 2)
  where
    next (RS0 s) = case next0 s of
        Done              -> Done
        Skip s'           -> Skip (RS0 s')
        Yield x s'
            | n <= 0x7F   -> Yield c  (RS0 s')
            | n <= 0x07FF -> Yield a2 (RS1 s' b2)
            | n <= 0xFFFF -> Yield a3 (RS2 s' b3 c3)
            | otherwise   -> Yield a4 (RS3 s' b4 c4 d4)
          where
            n  = ord x
            c  = fromIntegral n
            (a2,b2) = U8.ord2 x
            (a3,b3,c3) = U8.ord3 x
            (a4,b4,c4,d4) = U8.ord4 x
    next (RS1 s x2)       = Yield x2 (RS0 s)
    next (RS2 s x2 x3)    = Yield x2 (RS1 s x3)
    next (RS3 s x2 x3 x4) = Yield x2 (RS2 s x3 x4)
    {-# INLINE next #-}
{-# INLINE restreamUtf8 #-}

restreamUtf16BE :: Stream Char -> Stream Word8
restreamUtf16BE (Stream next0 s0 len) = Stream next (RS0 s0) (len * 2)
  where
    next (RS0 s) = case next0 s of
        Done -> Done
        Skip s' -> Skip (RS0 s')
        Yield x s'
            | n < 0x10000 -> Yield (fromIntegral $ n `shiftR` 8) $
                             RS1 s' (fromIntegral n)
            | otherwise   -> Yield c1 $ RS3 s' c2 c3 c4
            where
              n  = ord x
              n1 = n - 0x10000
              c1 = fromIntegral (n1 `shiftR` 18 + 0xD8)
              c2 = fromIntegral (n1 `shiftR` 10)
              n2 = n1 .&. 0x3FF
              c3 = fromIntegral (n2 `shiftR` 8 + 0xDC)
              c4 = fromIntegral n2
    next (RS1 s x2)       = Yield x2 (RS0 s)
    next (RS2 s x2 x3)    = Yield x2 (RS1 s x3)
    next (RS3 s x2 x3 x4) = Yield x2 (RS2 s x3 x4)
    {-# INLINE next #-}
{-# INLINE restreamUtf16BE #-}

restreamUtf16LE :: Stream Char -> Stream Word8
restreamUtf16LE (Stream next0 s0 len) = Stream next (RS0 s0) (len * 2)
  where
    next (RS0 s) = case next0 s of
        Done -> Done
        Skip s' -> Skip (RS0 s')
        Yield x s'
            | n < 0x10000 -> Yield (fromIntegral n) $
                             RS1 s' (fromIntegral $ shiftR n 8)
            | otherwise   -> Yield c1 $ RS3 s' c2 c3 c4
          where
            n  = ord x
            n1 = n - 0x10000
            c2 = fromIntegral (shiftR n1 18 + 0xD8)
            c1 = fromIntegral (shiftR n1 10)
            n2 = n1 .&. 0x3FF
            c4 = fromIntegral (shiftR n2 8 + 0xDC)
            c3 = fromIntegral n2
    next (RS1 s x2)       = Yield x2 (RS0 s)
    next (RS2 s x2 x3)    = Yield x2 (RS1 s x3)
    next (RS3 s x2 x3 x4) = Yield x2 (RS2 s x3 x4)
    {-# INLINE next #-}
{-# INLINE restreamUtf16LE #-}

restreamUtf32BE :: Stream Char -> Stream Word8
restreamUtf32BE (Stream next0 s0 len) = Stream next (RS0 s0) (len * 2)
  where
    next (RS0 s) = case next0 s of
        Done       -> Done
        Skip s'    -> Skip (RS0 s')
        Yield x s' -> Yield c1 (RS3 s' c2 c3 c4)
          where
            n  = ord x
            c1 = fromIntegral $ shiftR n 24
            c2 = fromIntegral $ shiftR n 16
            c3 = fromIntegral $ shiftR n 8
            c4 = fromIntegral n
    next (RS1 s x2)       = Yield x2 (RS0 s)
    next (RS2 s x2 x3)    = Yield x2 (RS1 s x3)
    next (RS3 s x2 x3 x4) = Yield x2 (RS2 s x3 x4)
    {-# INLINE next #-}
{-# INLINE restreamUtf32BE #-}

restreamUtf32LE :: Stream Char -> Stream Word8
restreamUtf32LE (Stream next0 s0 len) = Stream next (RS0 s0) (len * 2)
  where
    next (RS0 s) = case next0 s of
        Done       -> Done
        Skip s'    -> Skip (RS0 s')
        Yield x s' -> Yield c1 (RS3 s' c2 c3 c4)
          where
            n  = ord x
            c4 = fromIntegral $ shiftR n 24
            c3 = fromIntegral $ shiftR n 16
            c2 = fromIntegral $ shiftR n 8
            c1 = fromIntegral n
    next (RS1 s x2)       = Yield x2 (RS0 s)
    next (RS2 s x2 x3)    = Yield x2 (RS1 s x3)
    next (RS3 s x2 x3 x4) = Yield x2 (RS2 s x3 x4)
    {-# INLINE next #-}
{-# INLINE restreamUtf32LE #-}