Commits

Bryan O'Sullivan committed 44b54f4

Reduce the amount of code generated for string literals

Previously, every Text literal generated a big wad of inefficient
code:

{-# LANGUAGE OverloadedStrings #-}
foo :: Text
foo = "foo"

This would first convert to a String, then to a Text. To make
matters worse, the code for conversion from String to Text was
inlined at every site where a string literal occurred (expected,
but undesired, behaviour).

In this change, we introduce a direct conversion from Addr# to each
of the Text types, and we ensure that uses of these never result in
excessive code generation.

  • Participants
  • Parent commits 7082e94

Comments (0)

Files changed (3)

-{-# LANGUAGE BangPatterns, CPP, Rank2Types, UnboxedTuples #-}
+{-# LANGUAGE BangPatterns, CPP, MagicHash, Rank2Types, UnboxedTuples #-}
 {-# OPTIONS_GHC -fno-warn-orphans #-}
 
 -- |
 -- Module      : Data.Text
--- Copyright   : (c) 2009, 2010, 2011 Bryan O'Sullivan,
+-- Copyright   : (c) 2009, 2010, 2011, 2012 Bryan O'Sullivan,
 --               (c) 2009 Duncan Coutts,
 --               (c) 2008, 2009 Tom Harper
 --
 import qualified Data.Text.Lazy as L
 import Data.Int (Int64)
 #endif
+import qualified GHC.CString as GHC
+import GHC.Prim (Addr#)
 
 -- $strict
 --
 unpack = S.unstreamList . stream
 {-# INLINE [1] unpack #-}
 
+-- | /O(n)/ Convert a literal string into a Text.
+unpackCString# :: Addr# -> Text
+unpackCString# addr# = unstream (S.streamCString# addr#)
+{-# NOINLINE unpackCString# #-}
+
+{-# RULES "TEXT literal" forall a.
+    unstream (S.streamList (L.map safe (GHC.unpackCString# a)))
+      = unpackCString# a #-}
+
+{-# RULES "TEXT literal UTF8" forall a.
+    unstream (S.streamList (L.map safe (GHC.unpackCStringUtf8# a)))
+      = unpackCString# a #-}
+
 -- | /O(1)/ Convert a character into a Text.  Subject to fusion.
 -- Performs replacement on invalid scalar values.
 singleton :: Char -> Text

Data/Text/Fusion/Common.hs

-{-# LANGUAGE BangPatterns, Rank2Types #-}
+{-# LANGUAGE BangPatterns, MagicHash, Rank2Types #-}
 -- |
 -- Module      : Data.Text.Fusion.Common
--- Copyright   : (c) Bryan O'Sullivan 2009
+-- Copyright   : (c) Bryan O'Sullivan 2009, 2012
 --
 -- License     : BSD-style
 -- Maintainer  : bos@serpentine.com, rtomharper@googlemail.com,
       singleton
     , streamList
     , unstreamList
+    , streamCString#
 
     -- * Basic interface
     , cons
                 (&&), fromIntegral, otherwise)
 import qualified Data.List as L
 import qualified Prelude as P
+import Data.Bits (shiftL)
 import Data.Int (Int64)
 import Data.Text.Fusion.Internal
 import Data.Text.Fusion.CaseMapping (foldMapping, lowerMapping, upperMapping)
 import Data.Text.Fusion.Size
+import GHC.Prim (Addr#, chr#, indexCharOffAddr#, ord#)
+import GHC.Types (Char(..), Int(..))
 
 singleton :: Char -> Stream Char
 singleton c = Stream next False 1
 
 {-# RULES "STREAM streamList/unstreamList fusion" forall s. streamList (unstreamList s) = s #-}
 
+-- | Stream the UTF-8-like packed encoding used by GHC to represent
+-- constant strings in generated code.
+--
+-- This encoding uses the byte sequence "\xc0\x80" to represent NUL,
+-- and the string is NUL-terminated.
+streamCString# :: Addr# -> Stream Char
+streamCString# addr = Stream step 0 unknownSize
+  where
+    step !i
+        | b == 0    = Done
+        | b <= 0x7f = Yield (C# b#) (i+1)
+        | b <= 0xdf = let !c = chr $ ((b-0xc0) `shiftL` 6) + next 1
+                      in Yield c (i+2)
+        | b <= 0xef = let !c = chr $ ((b-0xe0) `shiftL` 12) +
+                                      (next 1  `shiftL` 6) +
+                                       next 2
+                      in Yield c (i+3)
+        | otherwise = let !c = chr $ ((b-0xf0) `shiftL` 18) +
+                                      (next 1  `shiftL` 12) +
+                                      (next 2  `shiftL` 6) +
+                                       next 3
+                      in Yield c (i+4)
+      where b      = I# (ord# b#)
+            next n = I# (ord# (at# (i+n))) - 0x80
+            b#     = at# i
+    at# (I# i#) = indexCharOffAddr# addr i#
+    chr (I# i#) = C# (chr# i#)
+{-# INLINE [0] streamCString# #-}
+
 -- ----------------------------------------------------------------------------
 -- * Basic stream functions
 

Data/Text/Lazy.hs

 {-# OPTIONS_GHC -fno-warn-orphans #-}
-{-# LANGUAGE BangPatterns, CPP #-}
+{-# LANGUAGE BangPatterns, MagicHash, CPP #-}
 -- |
 -- Module      : Data.Text.Lazy
--- Copyright   : (c) 2009, 2010 Bryan O'Sullivan
+-- Copyright   : (c) 2009, 2010, 2012 Bryan O'Sullivan
 --
 -- License     : BSD-style
 -- Maintainer  : bos@serpentine.com, rtomharper@googlemail.com,
 import Data.Text.Internal (firstf, safe, textP)
 import qualified Data.Text.Util as U
 import Data.Text.Lazy.Search (indices)
+import qualified GHC.CString as GHC
+import GHC.Prim (Addr#)
 
 -- $fusion
 --
 unpack t = S.unstreamList (stream t)
 {-# INLINE [1] unpack #-}
 
+-- | /O(n)/ Convert a literal string into a Text.
+unpackCString# :: Addr# -> Text
+unpackCString# addr# = unstream (S.streamCString# addr#)
+{-# NOINLINE unpackCString# #-}
+
+{-# RULES "TEXT literal" forall a.
+    unstream (S.streamList (L.map safe (GHC.unpackCString# a)))
+      = unpackCString# a #-}
+
+{-# RULES "TEXT literal UTF8" forall a.
+    unstream (S.streamList (L.map safe (GHC.unpackCStringUtf8# a)))
+      = unpackCString# a #-}
+
 -- | /O(1)/ Convert a character into a Text.  Subject to fusion.
 -- Performs replacement on invalid scalar values.
 singleton :: Char -> Text