Commits

Bryan O'Sullivan committed c9831b5

Drop the old pure-Haskell implementation of encodeUtf8

Comments (0)

Files changed (2)

Data/Text/Encoding.hs

 
     -- * Encoding Text to ByteStrings
     , encodeUtf8
-    , encodeUtf8_0
-    , encodeUtf8_2
     , encodeUtf16LE
     , encodeUtf16BE
     , encodeUtf32LE
 #endif
 
 import Control.Monad.ST (runST)
-import Data.Bits ((.&.))
 import Data.ByteString as B
 import Data.ByteString.Internal as B hiding (c2w)
 
 #if MIN_VERSION_bytestring(0,10,4)
+import Data.Bits ((.&.))
+import Data.Text.Internal.Unsafe.Char (ord)
 import qualified Data.ByteString.Builder as B
-import qualified Data.ByteString.Builder.Extra    as B
+import qualified Data.ByteString.Builder.Extra as B
 import qualified Data.ByteString.Builder.Internal as B hiding (empty)
 import qualified Data.ByteString.Builder.Prim as BP
 import qualified Data.ByteString.Builder.Prim.Internal as BP
 import qualified Data.ByteString.Lazy as BL
+import qualified Data.Text.Internal.Encoding.Utf16 as U16
 #endif
 
 import Data.Text ()
 import Data.Text.Encoding.Error (OnDecodeError, UnicodeException, strictDecode)
 import Data.Text.Internal (Text(..), safe, textP)
-import Data.Text.Internal.Unsafe.Shift (shiftL, shiftR)
+import Data.Text.Internal.Unsafe.Shift (shiftR)
 import Data.Text.Internal.Private (runText)
-import Data.Text.Internal.Unsafe.Char (ord, unsafeWrite)
+import Data.Text.Internal.Unsafe.Char (unsafeWrite)
 import Data.Word (Word8, Word32)
 import Foreign.C.Types (CSize(..))
 import Foreign.ForeignPtr (withForeignPtr)
 import GHC.Base (ByteArray#, MutableByteArray#)
 import qualified Data.Text.Array as A
 import qualified Data.Text.Internal.Encoding.Fusion as E
-import qualified Data.Text.Internal.Encoding.Utf16 as U16
 import qualified Data.Text.Internal.Fusion as F
 import Data.Text.Unsafe (unsafeDupablePerformIO)
 
 
 -- | Encode text using UTF-8 encoding.
 encodeUtf8 :: Text -> ByteString
-encodeUtf8 = encodeUtf8_0
-
-encodeUtf8_0 :: Text -> ByteString
-encodeUtf8_0 (Text arr off len) = unsafeDupablePerformIO $ do
-  let size0 = max len 4
-  mallocByteString size0 >>= start size0 off 0
- where
-  start size n0 m0 fp = withForeignPtr fp $ loop n0 m0
-   where
-    loop n1 m1 ptr = go n1 m1
-     where
-      offLen = off + len
-      go !n !m
-        | n == offLen = return (PS fp 0 m)
-        | otherwise = do
-            let poke8 k v = poke (ptr `plusPtr` k) (fromIntegral v :: Word8)
-                ensure k act
-                  | size-m >= k = act
-                  | otherwise = {-# SCC "resizeUtf8/ensure" #-} do
-                      let newSize = size `shiftL` 1
-                      fp' <- mallocByteString newSize
-                      withForeignPtr fp' $ \ptr' ->
-                        memcpy ptr' ptr (fromIntegral m)
-                      start newSize n m fp'
-                {-# INLINE ensure #-}
-            case A.unsafeIndex arr n of
-             w| w <= 0x7F  -> ensure 1 $ do
-                  poke (ptr `plusPtr` m) (fromIntegral w :: Word8)
-                  go (n+1) (m+1)
-              | w <= 0x7FF -> ensure 2 $ do
-                  poke8 m     $ (w `shiftR` 6) + 0xC0
-                  poke8 (m+1) $ (w .&. 0x3f) + 0x80
-                  go (n+1) (m+2)
-              | 0xD800 <= w && w <= 0xDBFF -> ensure 4 $ do
-                  let c = ord $ U16.chr2 w (A.unsafeIndex arr (n+1))
-                  poke8 m     $ (c `shiftR` 18) + 0xF0
-                  poke8 (m+1) $ ((c `shiftR` 12) .&. 0x3F) + 0x80
-                  poke8 (m+2) $ ((c `shiftR` 6) .&. 0x3F) + 0x80
-                  poke8 (m+3) $ (c .&. 0x3F) + 0x80
-                  go (n+2) (m+4)
-              | otherwise -> ensure 3 $ do
-                  poke8 m     $ (w `shiftR` 12) + 0xE0
-                  poke8 (m+1) $ ((w `shiftR` 6) .&. 0x3F) + 0x80
-                  poke8 (m+2) $ (w .&. 0x3F) + 0x80
-                  go (n+1) (m+3)
-
-encodeUtf8_2 :: Text -> ByteString
-encodeUtf8_2 (Text arr off len)
+encodeUtf8 (Text arr off len)
   | len == 0  = B.empty
   | otherwise = unsafeDupablePerformIO $ do
   fp <- mallocByteString (len*4)

benchmarks/haskell/Benchmarks/Pure.hs

             ]
         , bgroup "encode"
             [ benchT   $ nf T.encodeUtf8 ta
-            , benchT2  $ nf T.encodeUtf8_2 ta
             , benchTL  $ nf TL.encodeUtf8 tla
             , benchBS  $ nf BS.pack sa
             , benchBSL $ nf BL.pack sa
   where
     benchS   = bench ("String+" ++ kind)
     benchT   = bench ("Text+" ++ kind)
-    benchT2  = bench ("Text2+" ++ kind)
     benchTL  = bench ("LazyText+" ++ kind)
     benchBS  = bench ("ByteString+" ++ kind)
     benchBSL = bench ("LazyByteString+" ++ kind)