text / Data / Text / Foreign.hs

The default branch has multiple heads

Bryan O'Sullivan 194e0cf 
Bryan O'Sullivan f4f76b6 

Bryan O'Sullivan 9071533 
Bryan O'Sullivan f4f76b6 

Bryan O'Sullivan 7dfeb13 
Bryan O'Sullivan f4f76b6 










Bryan O'Sullivan 194e0cf 
Bryan O'Sullivan f4f76b6 
Bryan O'Sullivan 194e0cf 
Bryan O'Sullivan f4f76b6 
Bryan O'Sullivan f6ae04e 
Bryan O'Sullivan f4f76b6 


Bryan O'Sullivan f44aae4 
Bryan O'Sullivan 272a1dc 
Bryan O'Sullivan f44aae4 

Bryan O'Sullivan f4f76b6 

Bryan O'Sullivan c6bb1bb 
Bryan O'Sullivan f4f76b6 
Bryan O'Sullivan c6bb1bb 
Bryan O'Sullivan f4f76b6 

Bryan O'Sullivan 8743c42 
Bryan O'Sullivan f4f76b6 



Bryan O'Sullivan f6ae04e 
Bryan O'Sullivan f4f76b6 














Bryan O'Sullivan 194e0cf 



Bryan O'Sullivan f4f76b6 


Bryan O'Sullivan 194e0cf 
Bryan O'Sullivan f4f76b6 
Bryan O'Sullivan 194e0cf 

Bryan O'Sullivan c6bb1bb 



Bryan O'Sullivan f4f76b6 
Bryan O'Sullivan 61038db 
Bryan O'Sullivan f4f76b6 






Bryan O'Sullivan 272a1dc 






Bryan O'Sullivan f44aae4 





Bryan O'Sullivan 194e0cf 

Bryan O'Sullivan f44aae4 













Bryan O'Sullivan 194e0cf 

Bryan O'Sullivan f44aae4 







Bryan O'Sullivan f4f76b6 












Bryan O'Sullivan 194e0cf 
Bryan O'Sullivan f4f76b6 


Bryan O'Sullivan 194e0cf 
Bryan O'Sullivan f6ae04e 





{-# LANGUAGE BangPatterns, CPP, GeneralizedNewtypeDeriving #-}
-- |
-- Module      : Data.Text.Foreign
-- Copyright   : (c) 2009, 2010 Bryan O'Sullivan
--
-- License     : BSD-style
-- Maintainer  : bos@serpentine.com, rtomharper@googlemail.com,
--               duncan@haskell.org
-- Stability   : experimental
-- Portability : GHC
--
-- Support for using 'Text' data with native code via the Haskell
-- foreign function interface.

module Data.Text.Foreign
    (
    -- * Interoperability with native code
    -- $interop
      I16
    -- * Safe conversion functions
    , fromPtr
    , useAsPtr
    , asForeignPtr
    -- * Unsafe conversion code
    , lengthWord16
    , unsafeCopyToPtr
    -- * Low-level manipulation
    -- $lowlevel
    , dropWord16
    , takeWord16
    ) where

#if defined(ASSERTS)
import Control.Exception (assert)
#endif
import Control.Monad.ST (unsafeIOToST)
import Data.Text.Internal (Text(..), empty)
import Data.Text.Unsafe (lengthWord16)
import qualified Data.Text.Array as A
import Data.Word (Word16)
import Foreign.Marshal.Alloc (allocaBytes)
import Foreign.Ptr (Ptr, castPtr, plusPtr)
import Foreign.ForeignPtr (ForeignPtr, mallocForeignPtrArray, withForeignPtr)
import Foreign.Storable (peek, poke)

-- $interop
--
-- The 'Text' type is implemented using arrays that are not guaranteed
-- to have a fixed address in the Haskell heap. All communication with
-- native code must thus occur by copying data back and forth.
--
-- The 'Text' type's internal representation is UTF-16, using the
-- platform's native endianness.  This makes copied data suitable for
-- use with native libraries that use a similar representation, such
-- as ICU.  To interoperate with native libraries that use different
-- internal representations, such as UTF-8 or UTF-32, consider using
-- the functions in the 'Data.Text.Encoding' module.

-- | A type representing a number of UTF-16 code units.
newtype I16 = I16 Int
    deriving (Bounded, Enum, Eq, Integral, Num, Ord, Read, Real, Show)

-- | /O(n)/ Create a new 'Text' from a 'Ptr' 'Word16' by copying the
-- contents of the array.
fromPtr :: Ptr Word16           -- ^ source array
        -> I16                  -- ^ length of source array (in 'Word16' units)
        -> IO Text
fromPtr _   (I16 0)   = return empty
fromPtr ptr (I16 len) =
#if defined(ASSERTS)
    assert (len > 0) $
#endif
    return $! Text arr 0 len
  where
    arr = A.run (A.new len >>= copy)
    copy marr = loop ptr 0
      where
        loop !p !i | i == len = return marr
                   | otherwise = do
          A.unsafeWrite marr i =<< unsafeIOToST (peek p)
          loop (p `plusPtr` 2) (i + 1)

-- $lowlevel
--
-- Foreign functions that use UTF-16 internally may return indices in
-- units of 'Word16' instead of characters.  These functions may
-- safely be used with such indices, as they will adjust offsets if
-- necessary to preserve the validity of a Unicode string.

-- | /O(1)/ Return the prefix of the 'Text' of @n@ 'Word16' units in
-- length.
--
-- If @n@ would cause the 'Text' to end inside a surrogate pair, the
-- end of the prefix will be advanced by one additional 'Word16' unit
-- to maintain its validity.
takeWord16 :: I16 -> Text -> Text
takeWord16 (I16 n) t@(Text arr off len)
    | n <= 0               = empty
    | n >= len || m >= len = t
    | otherwise            = Text arr off m
  where
    m | w < 0xDB00 || w > 0xD8FF = n
      | otherwise                = n+1
    w = A.unsafeIndex arr (off+n-1)

-- | /O(1)/ Return the suffix of the 'Text', with @n@ 'Word16' units
-- dropped from its beginning.
--
-- If @n@ would cause the 'Text' to begin inside a surrogate pair, the
-- beginning of the suffix will be advanced by one additional 'Word16'
-- unit to maintain its validity.
dropWord16 :: I16 -> Text -> Text
dropWord16 (I16 n) t@(Text arr off len)
    | n <= 0               = t
    | n >= len || m >= len = empty
    | otherwise            = Text arr (off+m) (len-m)
  where
    m | w < 0xD800 || w > 0xDBFF = n
      | otherwise                = n+1
    w = A.unsafeIndex arr (off+n-1)

-- | /O(n)/ Copy a 'Text' to an array.  The array is assumed to be big
-- enough to hold the contents of the entire 'Text'.
unsafeCopyToPtr :: Text -> Ptr Word16 -> IO ()
unsafeCopyToPtr (Text arr off len) ptr = loop ptr off
  where
    end = off + len
    loop !p !i | i == end  = return ()
               | otherwise = do
      poke p (A.unsafeIndex arr i)
      loop (p `plusPtr` 2) (i + 1)

-- | /O(n)/ Perform an action on a temporary, mutable copy of a
-- 'Text'.  The copy is freed as soon as the action returns.
useAsPtr :: Text -> (Ptr Word16 -> I16 -> IO a) -> IO a
useAsPtr t@(Text _arr _off len) action =
    allocaBytes (len * 2) $ \buf -> do
      unsafeCopyToPtr t buf
      action (castPtr buf) (fromIntegral len)

-- | /O(n)/ Make a mutable copy of a 'Text'.
asForeignPtr :: Text -> IO (ForeignPtr Word16, I16)
asForeignPtr t@(Text _arr _off len) = do
  fp <- mallocForeignPtrArray len
  withForeignPtr fp $ unsafeCopyToPtr t
  return (fp, I16 len)
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.