text / Data / Text / Internal.hs

The default branch has multiple heads

Bryan O'Sullivan c6bb1bb 
Bryan O'Sullivan a0b7ed3 
Bryan O'Sullivan 0e79e48 
Bryan O'Sullivan 9adb5ed 
Bryan O'Sullivan 9071533 


Bryan O'Sullivan 0e79e48 

Bryan O'Sullivan 7dfeb13 
Bryan O'Sullivan 0e79e48 



Bryan O'Sullivan 36b2a3a 






Bryan O'Sullivan 0e79e48 
Bryan O'Sullivan ee471a1 

Bryan O'Sullivan 0e79e48 
Bryan O'Sullivan dd063aa 
Bryan O'Sullivan ee471a1 
Bryan O'Sullivan dcc1207 

Bryan O'Sullivan 9f6ffc1 
Bryan O'Sullivan e8797ca 

Bryan O'Sullivan 0e79e48 
Bryan O'Sullivan ee471a1 
Bryan O'Sullivan e8797ca 

Bryan O'Sullivan dcc1207 

Bryan O'Sullivan ee471a1 
Duncan Coutts 8503dd9 
Bryan O'Sullivan c6bb1bb 
Bryan O'Sullivan dcc1207 
Bryan O'Sullivan c6bb1bb 
Bryan O'Sullivan e8797ca 
Bryan O'Sullivan 6f79fce 
Bryan O'Sullivan e8797ca 
Bryan O'Sullivan a0b7ed3 
Duncan Coutts 8503dd9 
Bryan O'Sullivan 0e79e48 
Bryan O'Sullivan de2dd15 
Bryan O'Sullivan dd063aa 


Bryan O'Sullivan de2dd15 
Bryan O'Sullivan dcc1207 
Bryan O'Sullivan 2f4bcde 
Bryan O'Sullivan 6f2e782 
Bryan O'Sullivan dcc1207 
Bryan O'Sullivan c6bb1bb 








Bryan O'Sullivan dcc1207 
Duncan Coutts 8503dd9 
Bryan O'Sullivan 0e79e48 
Duncan Coutts 8503dd9 
Bryan O'Sullivan 6f79fce 
Duncan Coutts 8503dd9 
Bryan O'Sullivan dcc1207 
Bryan O'Sullivan 9f6ffc1 

Bryan O'Sullivan 6f2e782 
Bryan O'Sullivan 9f6ffc1 



Bryan O'Sullivan 2f4bcde 
Bryan O'Sullivan dcc1207 

Bryan O'Sullivan e2bf37d 
Bryan O'Sullivan dcc1207 
Bryan O'Sullivan e8797ca 






Bryan O'Sullivan dd063aa 

Bryan O'Sullivan e8797ca 









Bryan O'Sullivan dd063aa 

















{-# LANGUAGE CPP, DeriveDataTypeable #-}

-- |
-- Module      : Data.Text.Internal
-- Copyright   : (c) 2008, 2009 Tom Harper,
--               (c) 2009, 2010 Bryan O'Sullivan,
--               (c) 2009 Duncan Coutts
--
-- License     : BSD-style
-- Maintainer  : bos@serpentine.com, rtomharper@googlemail.com,
--               duncan@haskell.org
-- Stability   : experimental
-- Portability : GHC
--
-- A module containing private 'Text' internals. This exposes the
-- 'Text' representation and low level construction functions.
-- Modules which extend the 'Text' system may need to use this module.
--
-- You should not use this module unless you are determined to monkey
-- with the internals, as the functions here do just about nothing to
-- preserve data invariants.  You have been warned!

module Data.Text.Internal
    (
    -- * Types
    -- $internals
      Text(..)
    -- * Construction
    , text
    , textP
    -- * Safety
    , safe
    -- * Code that must be here for accessibility
    , empty
    -- * Utilities
    , firstf
    -- * Debugging
    , showText
    ) where

#if defined(ASSERTS)
import Control.Exception (assert)
#endif
import Data.Bits ((.&.))
import qualified Data.Text.Array as A
import Data.Text.UnsafeChar (ord)
import Data.Typeable (Typeable)

-- | A space efficient, packed, unboxed Unicode text type.
data Text = Text
    {-# UNPACK #-} !A.Array          -- payload (Word16 elements)
    {-# UNPACK #-} !Int              -- offset (units of Word16, not Char)
    {-# UNPACK #-} !Int              -- length (units of Word16, not Char)
    deriving (Typeable)

-- | Smart constructor.
text :: A.Array -> Int -> Int -> Text
text arr off len =
#if defined(ASSERTS)
  let c    = A.unsafeIndex arr off
      alen = A.length arr
  in assert (len >= 0) .
     assert (off >= 0) .
     assert (alen == 0 || len == 0 || off < alen) .
     assert (len == 0 || c < 0xDC00 || c > 0xDFFF) $
#endif
     Text arr off len
{-# INLINE text #-}

-- | /O(1)/ The empty 'Text'.
empty :: Text
empty = Text A.empty 0 0
{-# INLINE [1] empty #-}

-- | Construct a 'Text' without invisibly pinning its byte array in
-- memory if its length has dwindled to zero.
textP :: A.Array -> Int -> Int -> Text
textP arr off len | len == 0  = empty
                  | otherwise = text arr off len
{-# INLINE textP #-}

-- | A useful 'show'-like function for debugging purposes.
showText :: Text -> String
showText (Text arr off len) =
    "Text " ++ show (A.toList arr off len) ++ ' ' :
            show off ++ ' ' : show len

-- | Map a 'Char' to a 'Text'-safe value.
--
-- UTF-16 surrogate code points are not included in the set of Unicode
-- scalar values, but are unfortunately admitted as valid 'Char'
-- values by Haskell.  They cannot be represented in a 'Text'.  This
-- function remaps those code points to the Unicode replacement
-- character (U+FFFD, \'&#xfffd;\'), and leaves other code points
-- unchanged.
safe :: Char -> Char
safe c
    | ord c .&. 0x1ff800 /= 0xd800 = c
    | otherwise                    = '\xfffd'
{-# INLINE safe #-}

-- | Apply a function to the first element of an optional pair.
firstf :: (a -> c) -> Maybe (a,b) -> Maybe (c,b)
firstf f (Just (a, b)) = Just (f a, b)
firstf _  Nothing      = Nothing

-- $internals
--
-- Internally, the 'Text' type is represented as an array of 'Word16'
-- UTF-16 code units. The offset and length fields in the constructor
-- are in these units, /not/ units of 'Char'.
--
-- Invariants that all functions must maintain:
--
-- * Since the 'Text' type uses UTF-16 internally, it cannot represent
--   characters in the reserved surrogate code point range U+D800 to
--   U+DFFF. To maintain this invariant, the 'safe' function maps
--   'Char' values in this range to the replacement character (U+FFFD,
--   \'&#xfffd;\').
--
-- * A leading (or \"high\") surrogate code unit (0xD800–0xDBFF) must
--   always be followed by a trailing (or \"low\") surrogate code unit
--   (0xDC00-0xDFFF). A trailing surrogate code unit must always be
--   preceded by a leading surrogate code unit.
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.