Commits

Bryan O'Sullivan committed 56e917d

Roll back unsuccessful speedup attempts

Comments (0)

Files changed (4)

Data/Text/Array.hs

     , toList
     , unsafeFreeze
     , unsafeIndex
-    , unsafeIndexWord8
     , unsafeNew
     , unsafeWrite
-    , unsafeWriteWord8
     ) where
 
 #if defined(ASSERTS)
 import Data.Bits ((.&.))
 import Data.Text.UnsafeShift (shiftL, shiftR)
 import GHC.Base (ByteArray#, MutableByteArray#, Int(..),
-                 indexWord16Array#, indexWord8Array#, indexWordArray#, newByteArray#,
+                 indexWord16Array#, indexWordArray#, newByteArray#,
                  readWord16Array#, readWordArray#, unsafeCoerce#,
-                 writeWord16Array#, writeWord8Array#, writeWordArray#)
+                 writeWord16Array#, writeWordArray#)
 import GHC.ST (ST(..), runST)
-import GHC.Word (Word16(..), Word8(..), Word(..))
+import GHC.Word (Word16(..), Word(..))
 import Prelude hiding (length, read)
 
 -- | Immutable array type.
     case indexWordArray# ba# i# of r# -> (W# r#)
 {-# INLINE unsafeIndexWord #-}
 
--- | Unchecked read of an immutable array.  May return garbage or
--- crash on an out-of-bounds access.
-unsafeIndexWord8 :: Array -> Int -> Word8
-unsafeIndexWord8 (Array len ba#) i@(I# i#) =
-  CHECK_BOUNDS("unsafeIndexWord8",len,i)
-    case indexWord8Array# ba# i# of r# -> (W8# r#)
-{-# INLINE unsafeIndexWord8 #-}
-
 -- | Unchecked read of a mutable array.  May return garbage or
 -- crash on an out-of-bounds access.
 unsafeRead :: MArray s -> Int -> ST s Word16
     s2# -> (# s2#, () #)
 {-# INLINE unsafeWriteWord #-}
 
--- | Unchecked write of a mutable array.  May return garbage or crash
--- on an out-of-bounds access.
-unsafeWriteWord8 :: MArray s -> Int -> Word8 -> ST s ()
-unsafeWriteWord8 (MArray len marr#) i@(I# i#) (W8# e#) = ST $ \s1# ->
-  CHECK_BOUNDS("unsafeWriteWord8",len,i)
-  case writeWord8Array# marr# i# e# s1# of
-    s2# -> (# s2#, () #)
-{-# INLINE unsafeWriteWord8 #-}
-
 -- | Convert an immutable array to a list.
 toList :: Array -> [Word16]
 toList a = loop 0

Data/Text/Encoding/Fusion.hs

 #if defined(ASSERTS)
 import Control.Exception (assert)
 #endif
-import Data.Bits ((.&.), (.|.))
 import Data.ByteString.Internal (ByteString(..), mallocByteString, memcpy)
-import Control.Monad.ST
-import Data.Text.Array
 import Data.Text.Fusion (Step(..), Stream(..))
 import Data.Text.Fusion.Size
 import Data.Text.Encoding.Error
 
 -- | /O(n)/ Convert a 'ByteString' into a 'Stream Char', using UTF-8
 -- encoding.
-streamUtf8' :: OnDecodeError -> ByteString -> Stream Char
-streamUtf8' onErr bs = Stream next 0 (maxSize l)
+streamUtf8 :: OnDecodeError -> ByteString -> Stream Char
+streamUtf8 onErr bs = Stream next 0 (maxSize l)
     where
       l = B.length bs
       next i
             idx = B.unsafeIndex bs
 {-# INLINE [0] streamUtf8 #-}
 
-accept, reject :: Word32
-accept = 0
-reject = 12
-
-data S8 = S8 {-# UNPACK #-} !Word32 {-# UNPACK #-} !Int {-# UNPACK #-} !Word32
-
-streamUtf8 :: OnDecodeError -> ByteString -> Stream Char
-streamUtf8 onErr bs = Stream next (S8 accept 0 0) (maxSize l)
-  where
-    l = B.length bs
-    next (S8 state i code)
-      | i >= l = Done
-      | state' == accept = Yield (unsafeChr32 code') (s' accept)
-      | state' /= reject = Skip (s' state')
-      | otherwise        = decodeError "streamUtf8" "UTF-8" onErr
-                           (Just byte) (s' accept)
-      where s' s = S8 s (i+1) code'
-            byte = B.unsafeIndex bs i
-            word = fromIntegral byte
-            peeku :: Int -> Word32
-            peeku n = fromIntegral (unsafeIndexWord8 utf8d n)
-            !kind = peeku (fromIntegral word)
-            !code' | state /= accept = (word .&. 0x3f) .|. (code `shiftL` 6)
-                  | otherwise = (0xff `shiftR` fromIntegral kind) .&. word
-            !state' = peeku (256 + fromIntegral (state + kind))
-
-
 -- | /O(n)/ Convert a 'ByteString' into a 'Stream Char', using little
 -- endian UTF-16 encoding.
 streamUtf16LE :: OnDecodeError -> ByteString -> Stream Char
       Just c  -> Yield c i
     where desc = "Data.Text.Encoding.Fusion." ++ func ++ ": Invalid " ++
                  kind ++ " stream"
-
-utf8d :: Array
-{-# NOINLINE utf8d #-}
-utf8d = runST fill where
-    fill = do
-      ary <- unsafeNew . (`div` 2) . sum . map fst $ xs
-      mapM_ (uncurry (unsafeWriteWord8 ary))
-            (zip [0..] (concatMap (uncurry replicate) xs))
-      unsafeFreeze ary
-    xs = [(128,0),(16,1),(16,9),(32,7),(2,8),(30,2),(1,10),(12,3),(1,4),(2,3),
-          (1,11),(3,6),(1,5),(11,8),(1,0),(1,12),(1,24),(1,36),(1,60),(1,96),
-          (1,84),(3,12),(1,48),(1,72),(13,12),(1,0),(5,12),(1,0),(1,12),(1,0),
-          (3,12),(1,24),(5,12),(1,24),(1,12),(1,24),(9,12),(1,24),(5,12),(1,24),
-          (7,12),(1,24),(9,12),(1,36),(1,12),(1,36),(3,12),(1,36),(5,12),(1,36),
-          (1,12),(1,36),(3,12),(1,36),(10,12)]

Data/Text/Encoding/Fusion/Common.hs

     , restreamUtf16BE
     , restreamUtf32LE
     , restreamUtf32BE
-    , istreamUtf8
     ) where
 
-import Control.Monad.ST
-import Data.Bits ((.&.), (.|.))
-import Data.Text.Array
+import Data.Bits ((.&.))
 import Data.Text.Fusion (Step(..), Stream(..))
 import Data.Text.Fusion.Internal (M(..), S(..))
-import Data.Text.UnsafeChar (ord, unsafeChr32)
-import Data.Text.UnsafeShift (shiftL, shiftR)
-import Data.Word (Word8, Word32)
+import Data.Text.UnsafeChar (ord)
+import Data.Text.UnsafeShift (shiftR)
+import Data.Word (Word8)
 import qualified Data.Text.Encoding.Utf8 as U8
 
-accept, reject :: Word32
-accept = 0
-reject = 12
-
-data S8 s = S8 !s {-# UNPACK #-} !Word32 {-# UNPACK #-} !Word32
-
-istreamUtf8 :: Stream Word8 -> Stream Char
-istreamUtf8 (Stream next0 s0 len) =
-    Stream next (S8 s0 accept 0) len
-  where
-    next (S8 s state code) =
-      case next0 s of
-        Done -> Done
-        Skip s' -> Skip (S8 s' state code)
-        Yield w s'
-          | state' == accept -> Yield (unsafeChr32 code') (S8 s' accept code')
-          | state' /= reject -> Skip (S8 s' state' code')
-            where
-              word = fromIntegral w
-              peeku :: Int -> Word32
-              peeku n = fromIntegral (unsafeIndexWord8 utf8d n)
-              !kind = peeku (fromIntegral word)
-              !code' | state /= accept = (word .&. 0x3f) .|. (code `shiftL` 6)
-                     | otherwise = (0xff `shiftR` fromIntegral kind) .&. word
-              !state' = peeku (256 + fromIntegral (state + kind))
-               
-                                                           
 -- | /O(n)/ Convert a Stream Char into a UTF-8 encoded Stream Word8.
 restreamUtf8 :: Stream Char -> Stream Word8
 restreamUtf8 (Stream next0 s0 len) =
 internalError :: String -> a
 internalError func =
     error $ "Data.Text.Encoding.Fusion.Common." ++ func ++ ": internal error"
-
-utf8d :: Array
-{-# NOINLINE utf8d #-}
-utf8d = runST fill where
-    fill = do
-      ary <- unsafeNew . (`div` 2) . sum . map fst $ xs
-      mapM_ (uncurry (unsafeWriteWord8 ary))
-            (zip [0..] (concatMap (uncurry replicate) xs))
-      unsafeFreeze ary
-    xs = [(128,0),(16,1),(16,9),(32,7),(2,8),(30,2),(1,10),(12,3),(1,4),(2,3),
-          (1,11),(3,6),(1,5),(11,8),(1,0),(1,12),(1,24),(1,36),(1,60),(1,96),
-          (1,84),(3,12),(1,48),(1,72),(13,12),(1,0),(5,12),(1,0),(1,12),(1,0),
-          (3,12),(1,24),(5,12),(1,24),(1,12),(1,24),(9,12),(1,24),(5,12),(1,24),
-          (7,12),(1,24),(9,12),(1,36),(1,12),(1,36),(3,12),(1,36),(5,12),(1,36),
-          (1,12),(1,36),(3,12),(1,36),(10,12)]

Data/Text/Lazy/Encoding/Fusion.hs

        | S3 {-# UNPACK #-} !Word8 {-# UNPACK #-} !Word8 {-# UNPACK #-} !Word8
        | S4 {-# UNPACK #-} !Word8 {-# UNPACK #-} !Word8 {-# UNPACK #-} !Word8 {-# UNPACK #-} !Word8
 
-data SS = SS ByteString {-# UNPACK #-} !Int
-
-streamBytes :: ByteString -> Stream Word8
-streamBytes bs0 = Stream next (SS bs0 0) unknownSize
-  where
-    next (SS bs@(Chunk s ss) i)
-        | i < len = Yield (B.unsafeIndex s i) (SS bs (i+1))
-        | otherwise = next (SS ss 0)
-        where len = B.length s
-    next _ = Done
-
 data T = T {-# UNPACK #-} !ByteString {-# UNPACK #-} !S {-# UNPACK #-} !Int
 
 -- | /O(n)/ Convert a lazy 'ByteString' into a 'Stream Char', using
 -- UTF-8 encoding.
 streamUtf8 :: OnDecodeError -> ByteString -> Stream Char
-streamUtf8 onErr = istreamUtf8 . streamBytes
-
-streamUtf8' :: OnDecodeError -> ByteString -> Stream Char
-streamUtf8' onErr bs0 = Stream next (T bs0 S0 0) unknownSize
+streamUtf8 onErr bs0 = Stream next (T bs0 S0 0) unknownSize
   where
     next (T bs@(Chunk ps _) S0 i)
       | i < len && U8.validate1 a =