Source

text / Data / Text / Encoding.hs

The default branch has multiple heads

Diff from to

Data/Text/Encoding.hs

+{-# LANGUAGE BangPatterns #-}
 -- |
 -- Module      : Data.Text.Encoding
 -- Copyright   : (c) Tom Harper 2008-2009,
 
     -- * Encoding Text to ByteStrings
     , encodeUtf8
+    , encodeUtf8'
     , encodeUtf16LE
     , encodeUtf16BE
     , encodeUtf32LE
     , encodeUtf32BE
     ) where
     
-import Data.ByteString (ByteString)
+import Data.Bits ((.&.))
+import Data.ByteString as B
+import Data.ByteString.Internal as B
+import Data.Text.Encoding.Error (OnDecodeError, strictDecode)
+import Data.Text.Encoding.Utf16 (chr2)
+import Data.Text.Internal (Text(..))
+import Data.Text.UnsafeChar (ord)
+import Data.Text.UnsafeShift (shiftL, shiftR)
+import Data.Word (Word8)
+import Foreign.ForeignPtr (withForeignPtr)
+import Foreign.Ptr (plusPtr)
+import Foreign.Storable (poke)
+import System.IO.Unsafe (unsafePerformIO)
+import qualified Data.Text.Array as A
+import qualified Data.Text.Encoding.Fusion as E
 import qualified Data.Text.Fusion as F
-import Data.Text.Encoding.Error (OnDecodeError, strictDecode)
-import qualified Data.Text.Encoding.Fusion as E
-import Data.Text.Internal (Text)
 
 -- | Decode a 'ByteString' containing 7-bit ASCII encoded text.
 decodeASCII :: ByteString -> Text
 {-# INLINE decodeUtf8 #-}
 
 -- | Encode text using UTF-8 encoding.
+encodeUtf8' :: Text -> ByteString
+encodeUtf8' txt = E.unstream (E.restreamUtf8 (F.stream txt))
+{-# INLINE encodeUtf8' #-}
+
+-- | Encode text using UTF-8 encoding.
 encodeUtf8 :: Text -> ByteString
-encodeUtf8 txt = E.unstream (E.restreamUtf8 (F.stream txt))
-{-# INLINE encodeUtf8 #-}
+encodeUtf8 (Text arr off len) = unsafePerformIO $ do
+  let size0 = min len 4
+  mallocByteString size0 >>= start size0 off 0
+ where
+  start size n0 m0 fp = withForeignPtr fp $ loop n0 m0
+   where
+    loop n1 m1 ptr = go n1 m1
+     where
+      go !n !m
+        | n-off == len = return $! PS fp 0 m
+        | size-m < 4 = {-# SCC "encodeUtf8/resize" #-} do
+            let newSize = size `shiftL` 1
+            fp' <- mallocByteString newSize
+            withForeignPtr fp' $ \ptr' -> memcpy ptr' ptr (fromIntegral m)
+            start newSize n m fp'
+        | otherwise = do
+            let poke8 k v = poke (ptr `plusPtr` k) (fromIntegral v :: Word8)
+                w = A.unsafeIndex arr n
+            case undefined of
+             _| w <= 0x7F  -> do
+                  poke8 m w
+                  go (n+1) (m+1)
+              | w <= 0x7FF -> do
+                  poke8 m     $ (w `shiftR` 6) + 0xC0
+                  poke8 (m+1) $ (w .&. 0x3f) + 0x80
+                  go (n+1) (m+2)
+              | 0xD800 <= w && w <= 0xDBFF -> do
+                  let c = ord $ chr2 w (A.unsafeIndex arr (n+1))
+                  poke8 m     $ (c `shiftR` 18) + 0xF0
+                  poke8 (m+1) $ ((c `shiftR` 12) .&. 0x3F) + 0x80
+                  poke8 (m+2) $ ((c `shiftR` 6) .&. 0x3F) + 0x80
+                  poke8 (m+3) $ (c .&. 0x3F) + 0x80
+                  go (n+2) (m+4)
+              | otherwise -> do
+                  poke8 m     $ (w `shiftR` 12) + 0xE0
+                  poke8 (m+1) $ ((w `shiftR` 6) .&. 0x3F) + 0x80
+                  poke8 (m+2) $ (w .&. 0x3F) + 0x80
+                  go (n+1) (m+3)
+{- INLINE encodeUtf8 #-}
 
 -- | Decode text from little endian UTF-16 encoding.
 decodeUtf16LEWith :: OnDecodeError -> ByteString -> Text
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.