Commits

Bryan O'Sullivan committed ff91fb0

Use a more heavily packed state type for lazy UTF-8 decoding.

Comments (0)

Files changed (1)

Data/Text/Lazy/Encoding/Fusion.hs

 import Data.Text.Encoding.Fusion.Common
 import Data.Text.Encoding.Error
 import Data.Text.Fusion (Step(..), Stream(..))
-import Data.Text.Fusion.Internal (M(..), PairS(..), S(..))
+import Data.Text.Fusion.Internal (PairS(..))
 import Data.Text.Fusion.Size
 import Data.Text.UnsafeChar (unsafeChr8)
 import Data.Word (Word8)
 import Control.Exception (assert)
 import qualified Data.ByteString.Internal as B
 
+data S = S0
+       | S1 {-# UNPACK #-} !Word8
+       | S2 {-# UNPACK #-} !Word8 {-# UNPACK #-} !Word8
+       | S3 {-# UNPACK #-} !Word8 {-# UNPACK #-} !Word8 {-# UNPACK #-} !Word8
+       | S4 {-# UNPACK #-} !Word8 {-# UNPACK #-} !Word8 {-# UNPACK #-} !Word8 {-# UNPACK #-} !Word8
+
 -- | /O(n)/ Convert a lazy 'ByteString' into a 'Stream Char', using
 -- UTF-8 encoding.
 streamUtf8 :: OnDecodeError -> ByteString -> Stream Char
-streamUtf8 onErr bs0 = Stream next (bs0 :*: empty :*: 0) unknownSize
+streamUtf8 onErr bs0 = Stream next (bs0 :*: S0 :*: 0) unknownSize
     where
-      empty = S N N N N
-      next (bs@(Chunk ps _) :*: S N _ _ _ :*: i)
+      next (bs@(Chunk ps _) :*: S0 :*: i)
           | i < len && U8.validate1 a =
-              Yield (unsafeChr8 a) (bs :*: empty :*: i+1)
+              Yield (unsafeChr8 a) (bs :*: S0 :*: i+1)
           | i + 1 < len && U8.validate2 a b =
-              Yield (U8.chr2 a b) (bs :*: empty :*: i+2)
+              Yield (U8.chr2 a b) (bs :*: S0 :*: i+2)
           | i + 2 < len && U8.validate3 a b c =
-              Yield (U8.chr3 a b c) (bs :*: empty :*: i+3)
+              Yield (U8.chr3 a b c) (bs :*: S0 :*: i+3)
           | i + 4 < len && U8.validate4 a b c d =
-              Yield (U8.chr4 a b c d) (bs :*: empty :*: i+4)
+              Yield (U8.chr4 a b c d) (bs :*: S0 :*: i+4)
           where len = B.length ps
                 a = B.unsafeIndex ps i
                 b = B.unsafeIndex ps (i+1)
                 d = B.unsafeIndex ps (i+3)
       next st@(bs :*: s :*: i) =
         case s of
-          S (J a) N _ _             | U8.validate1 a ->
-            Yield (unsafeChr8 a) es
-          S (J a) (J b) N _         | U8.validate2 a b ->
-            Yield (U8.chr2 a b) es
-          S (J a) (J b) (J c) N     | U8.validate3 a b c ->
-            Yield (U8.chr3 a b c) es
-          S (J a) (J b) (J c) (J d) | U8.validate4 a b c d ->
-            Yield (U8.chr4 a b c d) es
+          S1 a | U8.validate1 a -> Yield (unsafeChr8 a) es
+          S2 a b | U8.validate2 a b -> Yield (U8.chr2 a b) es
+          S3 a b c | U8.validate3 a b c -> Yield (U8.chr3 a b c) es
+          S4 a b c d | U8.validate4 a b c d -> Yield (U8.chr4 a b c d) es
           _ -> consume st
-         where es = bs :*: empty :*: i
+         where es = bs :*: S0 :*: i
       consume (bs@(Chunk ps rest) :*: s :*: i)
           | i >= B.length ps = consume (rest :*: s  :*: 0)
           | otherwise =
         case s of
-          S N _ _ _ -> next (bs :*: S x N N N :*: i+1)
-          S a N _ _ -> next (bs :*: S a x N N :*: i+1)
-          S a b N _ -> next (bs :*: S a b x N :*: i+1)
-          S a b c N -> next (bs :*: S a b c x :*: i+1)
-          S (J a) b c d -> decodeError "streamUtf8" "UTF-8" onErr (Just a)
-                           (bs :*: S b c d N :*: i+1)
-          where x = J (B.unsafeIndex ps i)
-      consume (Empty :*: S N _ _ _ :*: _) = Done
+          S0 -> next (bs :*: S1 x :*: i+1)
+          S1 a -> next (bs :*: S2 a x :*: i+1)
+          S2 a b -> next (bs :*: S3 a b x :*: i+1)
+          S3 a b c -> next (bs :*: S4 a b c x :*: i+1)
+          S4 a b c d -> decodeError "streamUtf8" "UTF-8" onErr (Just a)
+                           (bs :*: S3 b c d :*: i+1)
+          where x = B.unsafeIndex ps i
+      consume (Empty :*: S0 :*: _) = Done
       consume st = decodeError "streamUtf8" "UTF-8" onErr Nothing st
 {-# INLINE [0] streamUtf8 #-}