Bryan O'Sullivan avatar Bryan O'Sullivan committed f702282

Account for the new I16 type in the text package.

Comments (0)

Files changed (5)

Data/Text/ICU/Break/IO.hsc

-{-# LANGUAGE BangPatterns, EmptyDataDecls, ForeignFunctionInterface,
-    RecordWildCards #-}
+{-# LANGUAGE BangPatterns, ForeignFunctionInterface, RecordWildCards #-}
 -- |
 -- Module      : Data.Text.ICU.Break.IO
 -- Copyright   : (c) 2010 Bryan O'Sullivan
     , breakWord
     , setText
     -- * Iteration functions
+    -- $indices
     , current
     , first
     , last
 #include <unicode/ubrk.h>
 
 import Control.Monad (forM)
-import Data.IORef (IORef, newIORef, writeIORef)
+import Data.IORef (newIORef, writeIORef)
 import Data.Int (Int32)
 import Data.Text (Text)
-import Data.Text.Foreign (useAsPtr)
+import Data.Text.Foreign (I16, useAsPtr)
+import Data.Text.ICU.Break.Types (BreakIterator(..), UBreakIterator)
 import Data.Text.ICU.Error.Internal (UErrorCode, handleError)
 import Data.Text.ICU.Internal (LocaleName(..), UBool, UChar, asBool, withLocaleName)
 import Foreign.C.String (CString, peekCString)
 import Foreign.C.Types (CInt)
-import Foreign.ForeignPtr (ForeignPtr, newForeignPtr, withForeignPtr)
+import Foreign.ForeignPtr (newForeignPtr, withForeignPtr)
 import Foreign.Marshal.Array (allocaArray, peekArray)
 import Foreign.Ptr (FunPtr, Ptr, nullPtr)
 import Prelude hiding (last)
 import System.IO.Unsafe (unsafePerformIO)
 
+-- $indices
+--
+-- /Important note/: All of the indices accepted and returned by
+-- functions in this module are offsets into the raw UTF-16 text
+-- array, /not/ a count of code points.
+
 -- | Line break status.
 data Line = Soft                -- ^ A soft line break is a position at
                                 -- which a line break is acceptable, but not
           | Ideograph           -- ^ A word containing ideographic characters.
             deriving (Eq, Show, Enum)
 
--- A boundary breaker.
-data BreakIterator a = BR {
-      brText :: IORef Text
-    , brStatus :: Int32 -> a
-    , brIter :: ForeignPtr UBreakIterator
-    }
-
 -- | Break a string on character boundaries.
 --
 -- Character boundary analysis identifies the boundaries of "Extended
             else Just $! fromIntegral i
 
 -- | Reset the breaker to the beginning of the text to be scanned.
-first :: BreakIterator a -> IO Int
+first :: BreakIterator a -> IO I16
 first BR{..} = fromIntegral `fmap` withForeignPtr brIter ubrk_first
 
 -- | Reset the breaker to the end of the text to be scanned.
-last :: BreakIterator a -> IO Int
+last :: BreakIterator a -> IO I16
 last BR{..} = fromIntegral `fmap` withForeignPtr brIter ubrk_last
 
 -- | Advance the iterator and break at the text boundary that follows the
 -- current text boundary.
-next :: BreakIterator a -> IO (Maybe Int)
+next :: BreakIterator a -> IO (Maybe I16)
 next = asIndex ubrk_next
 
 -- | Advance the iterator and break at the text boundary that precedes the
 -- current text boundary.
-previous :: BreakIterator a -> IO (Maybe Int)
+previous :: BreakIterator a -> IO (Maybe I16)
 previous = asIndex ubrk_previous
 
 -- | Determine the text boundary preceding the specified offset.
-preceding :: BreakIterator a -> Int -> IO (Maybe Int)
+preceding :: BreakIterator a -> Int -> IO (Maybe I16)
 preceding bi i = asIndex (flip ubrk_preceding (fromIntegral i)) bi
 
 -- | Determine the text boundary following the specified offset.
-following :: BreakIterator a -> Int -> IO (Maybe Int)
+following :: BreakIterator a -> Int -> IO (Maybe I16)
 following bi i = asIndex (flip ubrk_following (fromIntegral i)) bi
 
 -- | Return the character index most recently returned by 'next',
 -- 'previous', 'first', or 'last'.
-current :: BreakIterator a -> IO (Maybe Int)
+current :: BreakIterator a -> IO (Maybe I16)
 current = asIndex ubrk_current
 
 -- | Return the status from the break rule that determined the most recently
 {-# NOINLINE available #-}
 
 type UBreakIteratorType = CInt
-data UBreakIterator
 
 foreign import ccall unsafe "hs_text_icu.h __hs_ubrk_open" ubrk_open
     :: UBreakIteratorType -> CString -> Ptr UChar -> Int32 -> Ptr UErrorCode

Data/Text/ICU/Break/Types.hs

+{-# LANGUAGE EmptyDataDecls #-}
+-- |
+-- Module      : Data.Text.ICU.Break.Internal
+-- Copyright   : (c) 2010 Bryan O'Sullivan
+--
+-- License     : BSD-style
+-- Maintainer  : bos@serpentine.com
+-- Stability   : experimental
+-- Portability : GHC
+
+module Data.Text.ICU.Break.Types
+    (
+      BreakIterator(..)
+    , UBreakIterator
+    ) where
+
+import Data.IORef (IORef)
+import Data.Int (Int32)
+import Data.Text (Text)
+import Foreign.ForeignPtr (ForeignPtr)
+
+-- A boundary breaker.
+data BreakIterator a = BR {
+      brText :: IORef Text
+    , brStatus :: Int32 -> a
+    , brIter :: ForeignPtr UBreakIterator
+    }
+
+data UBreakIterator

Data/Text/ICU/Normalize.hsc

           let newLen' = fromIntegral newLen
           if newLen' > dlen
             then return (Left newLen')
-            else Right `fmap` fromPtr dptr newLen'
-  in loop slen
+            else Right `fmap` fromPtr dptr (fromIntegral newLen')
+  in loop (fromIntegral slen)
     
       
 -- | Perform an efficient check on a string, to quickly determine if

Data/Text/ICU/Text.hs

                                   (fromIntegral slen) opts
           if n > len
             then go n
-            else fromPtr dptr n
-    go slen
+            else fromPtr dptr (fromIntegral n)
+    go (fromIntegral slen)
 
 type CaseMapper = Ptr UChar -> Int32 -> Ptr UChar -> Int32 -> CString
                 -> Ptr UErrorCode -> IO Int32
                               (fromIntegral slen) locale
             if n > len
               then go n
-              else fromPtr dptr n
-      go slen
+              else fromPtr dptr (fromIntegral n)
+      go (fromIntegral slen)
 
 -- | Lowercase the characters in a string.
 --
   README include/hs_text_icu.h
 
 library
-  build-depends:     base < 5, bytestring, text == 0.8.*
+  build-depends:     base < 5, bytestring, text == 0.9.*
   if impl(ghc >= 6.10)
     build-depends:   base >= 4
 
       Data.Text.ICU.Normalize
       Data.Text.ICU.Types
   other-modules:
+      Data.Text.ICU.Break.Types
       Data.Text.ICU.Collate.Internal
       Data.Text.ICU.Collate.Pure
       Data.Text.ICU.Convert.Internal
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.