Bryan O'Sullivan avatar Bryan O'Sullivan committed 1eaf080

Move Utf* modules into Data.Text.Encoding

Comments (0)

Files changed (12)

 import qualified Prelude as P
 import Data.Text.Unsafe (iter, iter_, unsafeHead, unsafeTail)
 import Data.Text.UnsafeChar (unsafeChr)
-import qualified Data.Text.Utf16 as U16
+import qualified Data.Text.Encoding.Utf16 as U16
 
 -- $fusion
 --

Data/Text/Encoding.hs

     , decodeUtf32BE
 
     -- * Encoding Text to ByteStrings
-    , encodeASCII
     , encodeUtf8
     , encodeUtf16LE
     , encodeUtf16BE
 decodeUtf8 bs = F.unstream (E.streamUtf8 bs)
 {-# INLINE decodeUtf8 #-}
 
--- | Encode text using a 7-bit ASCII representation. /Note/: non-ASCII
--- characters in the input 'Text' will be /truncated/.
-encodeASCII :: Text -> ByteString
-encodeASCII txt = E.unstream (E.restreamASCII (F.stream txt))
-{-# INLINE encodeASCII #-}
-
 -- | Encode text using UTF-8 encoding.
 encodeUtf8 :: Text -> ByteString
 encodeUtf8 txt = E.unstream (E.restreamUtf8 (F.stream txt))

Data/Text/Encoding/Fusion.hs

     -- * Restreaming
     -- Restreaming is the act of converting from one 'Stream'
     -- representation to another.
-    , restreamASCII
     , restreamUtf8
     , restreamUtf16LE
     , restreamUtf16BE
 import System.IO.Unsafe (unsafePerformIO)
 import qualified Data.ByteString as B
 import qualified Data.ByteString.Unsafe as B
-import qualified Data.Text.Utf16 as U16
-import qualified Data.Text.Utf32 as U32
-import qualified Data.Text.Utf8 as U8
+import qualified Data.Text.Encoding.Utf8 as U8
+import qualified Data.Text.Encoding.Utf16 as U16
+import qualified Data.Text.Encoding.Utf32 as U32
 
 -- Specialised, strict Maybe-like type.
 data M = N
             idx = fromIntegral . B.unsafeIndex bs :: Int -> Word32
 {-# INLINE [0] streamUtf32LE #-}
 
-restreamASCII :: Stream Char -> Stream Word8
-restreamASCII (Stream next0 s0 len) =  Stream next s0 (len*2)
-    where
-      next !s = case next0 s of
-                  Done -> Done
-                  Skip s' -> Skip s'
-                  Yield x xs -> Yield x' xs
-                      where x' = fromIntegral (ord x) :: Word8
-{-# INLINE restreamASCII #-}
-
 -- | /O(n)/ Convert a Stream Char into a UTF-8 encoded Stream Word8.
 restreamUtf8 :: Stream Char -> Stream Word8
 restreamUtf8 (Stream next0 s0 len) =

Data/Text/Encoding/Utf16.hs

+{-# LANGUAGE MagicHash #-}
+
+-- |
+-- Module      : Data.Text.Encoding.Utf16
+-- Copyright   : (c) Tom Harper 2008-2009,
+--               (c) Bryan O'Sullivan 2009,
+--               (c) Duncan Coutts 2009
+--
+-- License     : BSD-style
+-- Maintainer  : rtharper@aftereternity.co.uk, bos@serpentine.com,
+--               duncan@haskell.org
+-- Stability   : experimental
+-- Portability : GHC
+--
+-- Basic UTF-16 validation and character manipulation.
+module Data.Text.Encoding.Utf16
+    (
+      chr2
+    , validate1
+    , validate2
+    ) where
+
+import GHC.Exts
+import GHC.Word (Word16(..))
+
+chr2 :: Word16 -> Word16 -> Char
+chr2 (W16# a#) (W16# b#) = C# (chr# (upper# +# lower# +# 0x10000#))
+    where
+      x# = word2Int# a#
+      y# = word2Int# b#
+      upper# = uncheckedIShiftL# (x# -# 0xD800#) 10#
+      lower# = y# -# 0xDC00#
+{-# INLINE chr2 #-}
+
+validate1    :: Word16 -> Bool
+validate1 x1 = (x1 >= 0 && x1 < 0xD800) || x1 > 0xDFFF
+{-# INLINE validate1 #-}
+
+validate2       ::  Word16 -> Word16 -> Bool
+validate2 x1 x2 = x1 >= 0xD800 && x1 <= 0xDBFF &&
+                  x2 >= 0xDC00 && x2 <= 0xDFFF
+{-# INLINE validate2 #-}

Data/Text/Encoding/Utf32.hs

+-- |
+-- Module      : Data.Text.Encoding.Utf16
+-- Copyright   : (c) Tom Harper 2008-2009,
+--               (c) Bryan O'Sullivan 2009,
+--               (c) Duncan Coutts 2009
+--
+-- License     : BSD-style
+-- Maintainer  : rtharper@aftereternity.co.uk, bos@serpentine.com,
+--               duncan@haskell.org
+-- Stability   : experimental
+-- Portability : portable
+--
+-- Basic UTF-32 validation.
+module Data.Text.Encoding.Utf32
+    (
+      validate
+    ) where
+
+import Data.Word (Word32)
+
+validate    :: Word32 -> Bool
+validate x1 = (x1 >= 0x0 && x1 < 0xD800) || (x1 > 0xDFFF && x1 <= 0x10FFFF)
+{-# INLINE validate #-}

Data/Text/Encoding/Utf8.hs

+{-# LANGUAGE MagicHash #-}
+
+-- |
+-- Module      : Data.Text.Encoding.Utf16
+-- Copyright   : (c) Tom Harper 2008-2009,
+--               (c) Bryan O'Sullivan 2009,
+--               (c) Duncan Coutts 2009
+--
+-- License     : BSD-style
+-- Maintainer  : rtharper@aftereternity.co.uk, bos@serpentine.com,
+--               duncan@haskell.org
+-- Stability   : experimental
+-- Portability : GHC
+--
+-- Basic UTF-8 validation and character manipulation.
+module Data.Text.Encoding.Utf8
+    (
+    -- Decomposition
+      ord2
+    , ord3
+    , ord4
+    -- Construction
+    , chr2
+    , chr3
+    , chr4
+    -- * Validation
+    , validate1
+    , validate2
+    , validate3
+    , validate4
+    ) where
+
+import Control.Exception (assert)
+import Data.Char (ord)
+import Data.Bits (shiftR, (.&.))
+import GHC.Exts
+import GHC.Word (Word8(..))
+
+default(Int)
+
+between :: Word8                -- ^ byte to check
+        -> Word8                -- ^ lower bound
+        -> Word8                -- ^ upper bound
+        -> Bool
+between x y z = x >= y && x <= z
+{-# INLINE between #-}
+
+ord2   :: Char -> (Word8,Word8)
+ord2 c = assert (n >= 0x80 && n <= 0x07ff) (x1,x2)
+    where
+      n  = ord c
+      x1 = fromIntegral $ (n `shiftR` 6) + 0xC0
+      x2 = fromIntegral $ (n .&. 0x3F)   + 0x80
+
+ord3   :: Char -> (Word8,Word8,Word8)
+ord3 c = assert (n >= 0x0800 && n <= 0xffff) (x1,x2,x3)
+    where
+      n  = ord c
+      x1 = fromIntegral $ (n `shiftR` 12) + 0xE0
+      x2 = fromIntegral $ ((n `shiftR` 6) .&. 0x3F) + 0x80
+      x3 = fromIntegral $ (n .&. 0x3F) + 0x80
+
+ord4   :: Char -> (Word8,Word8,Word8,Word8)
+ord4 c = assert (n >= 0x10000) (x1,x2,x3,x4)
+    where
+      n  = ord c
+      x1 = fromIntegral $ (n `shiftR` 18) + 0xF0
+      x2 = fromIntegral $ ((n `shiftR` 12) .&. 0x3F) + 0x80
+      x3 = fromIntegral $ ((n `shiftR` 6) .&. 0x3F) + 0x80
+      x4 = fromIntegral $ (n .&. 0x3F) + 0x80
+
+chr2       :: Word8 -> Word8 -> Char
+chr2 (W8# x1#) (W8# x2#) = C# (chr# (z1# +# z2#))
+    where
+      y1# = word2Int# x1#
+      y2# = word2Int# x2#
+      z1# = uncheckedIShiftL# (y1# -# 0xC0#) 6#
+      z2# = y2# -# 0x80#
+{-# INLINE chr2 #-}
+
+chr3          :: Word8 -> Word8 -> Word8 -> Char
+chr3 (W8# x1#) (W8# x2#) (W8# x3#) = C# (chr# (z1# +# z2# +# z3#))
+    where
+      y1# = word2Int# x1#
+      y2# = word2Int# x2#
+      y3# = word2Int# x3#
+      z1# = uncheckedIShiftL# (y1# -# 0xE0#) 12#
+      z2# = uncheckedIShiftL# (y2# -# 0x80#) 6#
+      z3# = y3# -# 0x80#
+{-# INLINE chr3 #-}
+
+chr4             :: Word8 -> Word8 -> Word8 -> Word8 -> Char
+chr4 (W8# x1#) (W8# x2#) (W8# x3#) (W8# x4#) =
+    C# (chr# (z1# +# z2# +# z3# +# z4#))
+    where
+      y1# = word2Int# x1#
+      y2# = word2Int# x2#
+      y3# = word2Int# x3#
+      y4# = word2Int# x4#
+      z1# = uncheckedIShiftL# (y1# -# 0xF0#) 18#
+      z2# = uncheckedIShiftL# (y2# -# 0x80#) 12#
+      z3# = uncheckedIShiftL# (y3# -# 0x80#) 6#
+      z4# = y4# -# 0x80#
+{-# INLINE chr4 #-}
+
+validate1    :: Word8 -> Bool
+validate1 x1 = between x1 0x00 0x7F
+{-# INLINE validate1 #-}
+
+validate2       :: Word8 -> Word8 -> Bool
+validate2 x1 x2 = between x1 0xC2 0xDF && between x2 0x80 0xBF
+{-# INLINE validate2 #-}
+
+validate3          :: Word8 -> Word8 -> Word8 -> Bool
+{-# INLINE validate3 #-}
+validate3 x1 x2 x3 = validate3_1 ||
+                     validate3_2 ||
+                     validate3_3 ||
+                     validate3_4
+  where
+    validate3_1 = (x1 == 0xE0) &&
+                  between x2 0xA0 0xBF &&
+                  between x3 0x80 0xBF
+    validate3_2 = between x1 0xE1 0xEC &&
+                  between x2 0x80 0xBF &&
+                  between x3 0x80 0xBF
+    validate3_3 = x1 == 0xED &&
+                  between x2 0x80 0x9F &&
+                  between x3 0x80 0xBF
+    validate3_4 = between x1 0xEE 0xEF &&
+                  between x2 0x80 0xBF &&
+                  between x3 0x80 0xBF
+
+validate4             :: Word8 -> Word8 -> Word8 -> Word8 -> Bool
+{-# INLINE validate4 #-}
+validate4 x1 x2 x3 x4 = validate4_1 ||
+                        validate4_2 ||
+                        validate4_3
+  where 
+    validate4_1 = x1 == 0xF0 &&
+                  between x2 0x90 0xBF &&
+                  between x3 0x80 0xBF &&
+                  between x4 0x80 0xBF
+    validate4_2 = between x1 0xF1 0xF3 &&
+                  between x2 0x80 0xBF &&
+                  between x3 0x80 0xBF &&
+                  between x4 0x80 0xBF
+    validate4_3 = x1 == 0xF4 &&
+                  between x2 0x80 0x8F &&
+                  between x3 0x80 0xBF &&
+                  between x4 0x80 0xBF

Data/Text/Fusion.hs

 import Data.Text.UnsafeChar (unsafeChr, unsafeWrite, unsafeWriteRev)
 import qualified Data.Text.Array as A
 import qualified Data.Text.Internal as I
-import qualified Data.Text.Utf16 as U16
+import qualified Data.Text.Encoding.Utf16 as U16
 import qualified Prelude as P
 
 default(Int)

Data/Text/Unsafe.hs

 import Control.Exception (assert)
 import Data.Text.Internal (Text(..))
 import Data.Text.UnsafeChar (unsafeChr)
-import Data.Text.Utf16 (chr2)
+import Data.Text.Encoding.Utf16 (chr2)
 import qualified Data.Text.Array as A
 
 -- | /O(1)/ A variant of 'head' for non-empty 'Text'. 'unsafeHead'

Data/Text/Utf16.hs

-{-# LANGUAGE MagicHash #-}
-
--- |
--- Module      : Data.Text.Utf16
--- Copyright   : (c) Tom Harper 2008-2009,
---               (c) Bryan O'Sullivan 2009,
---               (c) Duncan Coutts 2009
---
--- License     : BSD-style
--- Maintainer  : rtharper@aftereternity.co.uk, bos@serpentine.com,
---               duncan@haskell.org
--- Stability   : experimental
--- Portability : GHC
---
--- Basic UTF-16 validation and character manipulation.
-module Data.Text.Utf16
-    (
-      chr2
-    , validate1
-    , validate2
-    ) where
-
-import GHC.Exts
-import GHC.Word (Word16(..))
-
-chr2 :: Word16 -> Word16 -> Char
-chr2 (W16# a#) (W16# b#) = C# (chr# (upper# +# lower# +# 0x10000#))
-    where
-      x# = word2Int# a#
-      y# = word2Int# b#
-      upper# = uncheckedIShiftL# (x# -# 0xD800#) 10#
-      lower# = y# -# 0xDC00#
-{-# INLINE chr2 #-}
-
-validate1    :: Word16 -> Bool
-validate1 x1 = (x1 >= 0 && x1 < 0xD800) || x1 > 0xDFFF
-{-# INLINE validate1 #-}
-
-validate2       ::  Word16 -> Word16 -> Bool
-validate2 x1 x2 = x1 >= 0xD800 && x1 <= 0xDBFF &&
-                  x2 >= 0xDC00 && x2 <= 0xDFFF
-{-# INLINE validate2 #-}

Data/Text/Utf32.hs

--- |
--- Module      : Data.Text.Utf16
--- Copyright   : (c) Tom Harper 2008-2009,
---               (c) Bryan O'Sullivan 2009,
---               (c) Duncan Coutts 2009
---
--- License     : BSD-style
--- Maintainer  : rtharper@aftereternity.co.uk, bos@serpentine.com,
---               duncan@haskell.org
--- Stability   : experimental
--- Portability : portable
---
--- Basic UTF-32 validation.
-module Data.Text.Utf32
-    (
-      validate
-    ) where
-
-import Data.Word (Word32)
-
-validate    :: Word32 -> Bool
-validate x1 = (x1 >= 0x0 && x1 < 0xD800) || (x1 > 0xDFFF && x1 <= 0x10FFFF)
-{-# INLINE validate #-}

Data/Text/Utf8.hs

-{-# LANGUAGE MagicHash #-}
-
--- |
--- Module      : Data.Text.Utf16
--- Copyright   : (c) Tom Harper 2008-2009,
---               (c) Bryan O'Sullivan 2009,
---               (c) Duncan Coutts 2009
---
--- License     : BSD-style
--- Maintainer  : rtharper@aftereternity.co.uk, bos@serpentine.com,
---               duncan@haskell.org
--- Stability   : experimental
--- Portability : GHC
---
--- Basic UTF-8 validation and character manipulation.
-module Data.Text.Utf8
-    (
-    -- Decomposition
-      ord2
-    , ord3
-    , ord4
-    -- Construction
-    , chr2
-    , chr3
-    , chr4
-    -- * Validation
-    , validate1
-    , validate2
-    , validate3
-    , validate4
-    ) where
-
-import Control.Exception (assert)
-import Data.Char (ord)
-import Data.Bits (shiftR, (.&.))
-import GHC.Exts
-import GHC.Word (Word8(..))
-
-default(Int)
-
-between :: Word8                -- ^ byte to check
-        -> Word8                -- ^ lower bound
-        -> Word8                -- ^ upper bound
-        -> Bool
-between x y z = x >= y && x <= z
-{-# INLINE between #-}
-
-ord2   :: Char -> (Word8,Word8)
-ord2 c = assert (n >= 0x80 && n <= 0x07ff) (x1,x2)
-    where
-      n  = ord c
-      x1 = fromIntegral $ (n `shiftR` 6) + 0xC0
-      x2 = fromIntegral $ (n .&. 0x3F)   + 0x80
-
-ord3   :: Char -> (Word8,Word8,Word8)
-ord3 c = assert (n >= 0x0800 && n <= 0xffff) (x1,x2,x3)
-    where
-      n  = ord c
-      x1 = fromIntegral $ (n `shiftR` 12) + 0xE0
-      x2 = fromIntegral $ ((n `shiftR` 6) .&. 0x3F) + 0x80
-      x3 = fromIntegral $ (n .&. 0x3F) + 0x80
-
-ord4   :: Char -> (Word8,Word8,Word8,Word8)
-ord4 c = assert (n >= 0x10000) (x1,x2,x3,x4)
-    where
-      n  = ord c
-      x1 = fromIntegral $ (n `shiftR` 18) + 0xF0
-      x2 = fromIntegral $ ((n `shiftR` 12) .&. 0x3F) + 0x80
-      x3 = fromIntegral $ ((n `shiftR` 6) .&. 0x3F) + 0x80
-      x4 = fromIntegral $ (n .&. 0x3F) + 0x80
-
-chr2       :: Word8 -> Word8 -> Char
-chr2 (W8# x1#) (W8# x2#) = C# (chr# (z1# +# z2#))
-    where
-      y1# = word2Int# x1#
-      y2# = word2Int# x2#
-      z1# = uncheckedIShiftL# (y1# -# 0xC0#) 6#
-      z2# = y2# -# 0x80#
-{-# INLINE chr2 #-}
-
-chr3          :: Word8 -> Word8 -> Word8 -> Char
-chr3 (W8# x1#) (W8# x2#) (W8# x3#) = C# (chr# (z1# +# z2# +# z3#))
-    where
-      y1# = word2Int# x1#
-      y2# = word2Int# x2#
-      y3# = word2Int# x3#
-      z1# = uncheckedIShiftL# (y1# -# 0xE0#) 12#
-      z2# = uncheckedIShiftL# (y2# -# 0x80#) 6#
-      z3# = y3# -# 0x80#
-{-# INLINE chr3 #-}
-
-chr4             :: Word8 -> Word8 -> Word8 -> Word8 -> Char
-chr4 (W8# x1#) (W8# x2#) (W8# x3#) (W8# x4#) =
-    C# (chr# (z1# +# z2# +# z3# +# z4#))
-    where
-      y1# = word2Int# x1#
-      y2# = word2Int# x2#
-      y3# = word2Int# x3#
-      y4# = word2Int# x4#
-      z1# = uncheckedIShiftL# (y1# -# 0xF0#) 18#
-      z2# = uncheckedIShiftL# (y2# -# 0x80#) 12#
-      z3# = uncheckedIShiftL# (y3# -# 0x80#) 6#
-      z4# = y4# -# 0x80#
-{-# INLINE chr4 #-}
-
-validate1    :: Word8 -> Bool
-validate1 x1 = between x1 0x00 0x7F
-{-# INLINE validate1 #-}
-
-validate2       :: Word8 -> Word8 -> Bool
-validate2 x1 x2 = between x1 0xC2 0xDF && between x2 0x80 0xBF
-{-# INLINE validate2 #-}
-
-validate3          :: Word8 -> Word8 -> Word8 -> Bool
-{-# INLINE validate3 #-}
-validate3 x1 x2 x3 = validate3_1 ||
-                     validate3_2 ||
-                     validate3_3 ||
-                     validate3_4
-  where
-    validate3_1 = (x1 == 0xE0) &&
-                  between x2 0xA0 0xBF &&
-                  between x3 0x80 0xBF
-    validate3_2 = between x1 0xE1 0xEC &&
-                  between x2 0x80 0xBF &&
-                  between x3 0x80 0xBF
-    validate3_3 = x1 == 0xED &&
-                  between x2 0x80 0x9F &&
-                  between x3 0x80 0xBF
-    validate3_4 = between x1 0xEE 0xEF &&
-                  between x2 0x80 0xBF &&
-                  between x3 0x80 0xBF
-
-validate4             :: Word8 -> Word8 -> Word8 -> Word8 -> Bool
-{-# INLINE validate4 #-}
-validate4 x1 x2 x3 x4 = validate4_1 ||
-                        validate4_2 ||
-                        validate4_3
-  where 
-    validate4_1 = x1 == 0xF0 &&
-                  between x2 0x90 0xBF &&
-                  between x3 0x80 0xBF &&
-                  between x4 0x80 0xBF
-    validate4_2 = between x1 0xF1 0xF3 &&
-                  between x2 0x80 0xBF &&
-                  between x3 0x80 0xBF &&
-                  between x4 0x80 0xBF
-    validate4_3 = x1 == 0xF4 &&
-                  between x2 0x80 0x8F &&
-                  between x3 0x80 0xBF &&
-                  between x4 0x80 0xBF
     Data.Text.Internal
     Data.Text.Unsafe
     Data.Text.UnsafeChar
-    Data.Text.Utf8
-    Data.Text.Utf32
-    Data.Text.Utf16
+    Data.Text.Encoding.Utf8
+    Data.Text.Encoding.Utf16
+    Data.Text.Encoding.Utf32
 
   build-depends:
     base       < 5,
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.