text / Data / Text / Fusion.hs

The default branch has multiple heads

Bryan O'Sullivan 9f6ffc1 
Duncan Coutts 8503dd9 
Bryan O'Sullivan 4762cdc 


Bryan O'Sullivan 2a4faa5 
Bryan O'Sullivan 4762cdc 


Bryan O'Sullivan 7dfeb13 
Bryan O'Sullivan 4762cdc 





Bryan O'Sullivan 5cec6d4 

Bryan O'Sullivan 4762cdc 
Bryan O'Sullivan 5cec6d4 

Bryan O'Sullivan 4762cdc 

Bryan O'Sullivan 5cec6d4 

Bryan O'Sullivan 5da388d 
Bryan O'Sullivan 4762cdc 
Bryan O'Sullivan 5cec6d4 
Bryan O'Sullivan 4762cdc 

Bryan O'Sullivan 0963495 
Bryan O'Sullivan 4762cdc 

Bryan O'Sullivan a3d8ac0 
Bryan O'Sullivan 5da388d 
Bryan O'Sullivan a3d8ac0 
Bryan O'Sullivan c4397da 


Bryan O'Sullivan a3d8ac0 
Bryan O'Sullivan 5cec6d4 
Bryan O'Sullivan 4762cdc 

Bryan O'Sullivan 5cec6d4 

Bryan O'Sullivan f869536 
Bryan O'Sullivan 5cec6d4 
Duncan Coutts 8503dd9 
Bryan O'Sullivan 046b1a9 
Bryan O'Sullivan 9c2cd37 
Bryan O'Sullivan aed9292 
Bryan O'Sullivan fa76866 
Bryan O'Sullivan 09e8479 
Bryan O'Sullivan c6bb1bb 
Bryan O'Sullivan f43a5f0 
Bryan O'Sullivan 6f79fce 
Bryan O'Sullivan bfc542c 
Bryan O'Sullivan 9f6ffc1 
Bryan O'Sullivan 046b1a9 
Bryan O'Sullivan 09e8479 
Bryan O'Sullivan 1eaf080 
Bryan O'Sullivan aed9292 
Duncan Coutts 8503dd9 


Bryan O'Sullivan 5da388d 
Duncan Coutts 8503dd9 
Bryan O'Sullivan 046b1a9 
Duncan Coutts 8503dd9 
Bryan O'Sullivan f4099b7 
Duncan Coutts 8503dd9 
Bryan O'Sullivan dc93e96 
Duncan Coutts 8503dd9 
Bryan O'Sullivan dc93e96 
Duncan Coutts 8503dd9 
Bryan O'Sullivan 6f79fce 

Duncan Coutts 8503dd9 

Bryan O'Sullivan 5da388d 


Bryan O'Sullivan 046b1a9 
Bryan O'Sullivan 5da388d 










Bryan O'Sullivan 53b9461 
Duncan Coutts 8503dd9 
Bryan O'Sullivan f43a5f0 
Bryan O'Sullivan a1b3e9d 
Bryan O'Sullivan 61038db 
Bryan O'Sullivan a1b3e9d 









Bryan O'Sullivan 61038db 
Bryan O'Sullivan 197e4b5 
Bryan O'Sullivan 3a20ef2 
Bryan O'Sullivan a8fac16 

Bryan O'Sullivan a1b3e9d 

Duncan Coutts 8503dd9 
Bryan O'Sullivan 16ff63b 
Duncan Coutts 8503dd9 
Bryan O'Sullivan 017cc6b 
Duncan Coutts 8503dd9 



Bryan O'Sullivan bfc542c 
Duncan Coutts 8503dd9 

Bryan O'Sullivan 0963495 

Bryan O'Sullivan 5da388d 
Bryan O'Sullivan 046b1a9 

Bryan O'Sullivan 0963495 
Bryan O'Sullivan 046b1a9 
Bryan O'Sullivan 61038db 
Bryan O'Sullivan 5da388d 




Bryan O'Sullivan c1fe8f7 
Bryan O'Sullivan 1d91b39 
Bryan O'Sullivan 61038db 
Bryan O'Sullivan 197e4b5 
Bryan O'Sullivan 5da388d 







Bryan O'Sullivan f8c1e64 
Bryan O'Sullivan 5da388d 
Bryan O'Sullivan f8c1e64 

Bryan O'Sullivan 5da388d 
Bryan O'Sullivan f8c1e64 


Bryan O'Sullivan 0963495 
Duncan Coutts 8503dd9 
Bryan O'Sullivan 5da388d 


Bryan O'Sullivan bbd7a6d 
Bryan O'Sullivan 5da388d 

Bryan O'Sullivan bbd7a6d 

Bryan O'Sullivan 5da388d 
Bryan O'Sullivan bbd7a6d 

Bryan O'Sullivan 5da388d 


Bryan O'Sullivan b95676b 




Nicolas Pouillar… 7938a65 
Bryan O'Sullivan b95676b 

Duncan Coutts 8503dd9 


Bryan O'Sullivan 9c2cd37 
Duncan Coutts 8503dd9 
Bryan O'Sullivan 9c2cd37 
Duncan Coutts 8503dd9 





Bryan O'Sullivan 17ec386 
Bryan O'Sullivan 469a86e 

Bryan O'Sullivan 55ac947 

Bryan O'Sullivan f869536 


Bryan O'Sullivan c4397da 






Bryan O'Sullivan 61038db 
Bryan O'Sullivan c4397da 









Bryan O'Sullivan 61038db 
Bryan O'Sullivan c4397da 






{-# LANGUAGE BangPatterns, MagicHash #-}

-- |
-- Module      : Data.Text.Fusion
-- Copyright   : (c) Tom Harper 2008-2009,
--               (c) Bryan O'Sullivan 2009-2010,
--               (c) Duncan Coutts 2009
--
-- License     : BSD-style
-- Maintainer  : bos@serpentine.com, rtomharper@googlemail.com,
--               duncan@haskell.org
-- Stability   : experimental
-- Portability : GHC
--
-- Text manipulation functions represented as fusible operations over
-- streams.
module Data.Text.Fusion
    (
    -- * Types
      Stream(..)
    , Step(..)

    -- * Creation and elimination
    , stream
    , unstream
    , reverseStream

    , length

    -- * Transformations
    , reverse

    -- * Construction
    -- ** Scans
    , reverseScanr

    -- ** Accumulating maps
    , mapAccumL

    -- ** Generation and unfolding
    , unfoldrN

    -- * Indexing
    , index
    , findIndex
    , countChar
    ) where

import Prelude (Bool(..), Char, Maybe(..), Monad(..), Int,
                Num(..), Ord(..), ($), (&&),
                fromIntegral, otherwise)
import Data.Bits ((.&.))
import Data.Text.Internal (Text(..))
import Data.Text.UnsafeChar (ord, unsafeChr, unsafeWrite)
import Data.Text.UnsafeShift (shiftL, shiftR)
import qualified Data.Text.Array as A
import qualified Data.Text.Fusion.Common as S
import Data.Text.Fusion.Internal
import Data.Text.Fusion.Size
import qualified Data.Text.Internal as I
import qualified Data.Text.Encoding.Utf16 as U16
import qualified Prelude as P

default(Int)

-- | /O(n)/ Convert a 'Text' into a 'Stream Char'.
stream :: Text -> Stream Char
stream (Text arr off len) = Stream next off (maxSize len)
    where
      !end = off+len
      next !i
          | i >= end                   = Done
          | n >= 0xD800 && n <= 0xDBFF = Yield (U16.chr2 n n2) (i + 2)
          | otherwise                  = Yield (unsafeChr n) (i + 1)
          where
            n  = A.unsafeIndex arr i
            n2 = A.unsafeIndex arr (i + 1)
{-# INLINE [0] stream #-}

-- | /O(n)/ Convert a 'Text' into a 'Stream Char', but iterate
-- backwards.
reverseStream :: Text -> Stream Char
reverseStream (Text arr off len) = Stream next (off+len-1) (maxSize len)
    where
      {-# INLINE next #-}
      next !i
          | i < off                    = Done
          | n >= 0xDC00 && n <= 0xDFFF = Yield (U16.chr2 n2 n) (i - 2)
          | otherwise                  = Yield (unsafeChr n) (i - 1)
          where
            n  = A.unsafeIndex arr i
            n2 = A.unsafeIndex arr (i - 1)
{-# INLINE [0] reverseStream #-}

-- | /O(n)/ Convert a 'Stream Char' into a 'Text'.
unstream :: Stream Char -> Text
unstream (Stream next0 s0 len) = I.textP (P.fst a) 0 (P.snd a)
  where
    a = A.run2 (A.new mlen >>= \arr -> outer arr mlen s0 0)
      where mlen = upperBound 4 len
    outer arr top = loop
      where
        loop !s !i =
            case next0 s of
              Done          -> return (arr, i)
              Skip s'       -> loop s' i
              Yield x s'
                | j >= top  -> {-# SCC "unstream/resize" #-} do
                               let top' = (top + 1) `shiftL` 1
                               arr' <- A.new top'
                               A.copyM arr' 0 arr 0 top
                               outer arr' top' s i
                | otherwise -> do d <- unsafeWrite arr i x
                                  loop s' (i+d)
                where j | ord x < 0x10000 = i
                        | otherwise       = i + 1
{-# INLINE [0] unstream #-}
{-# RULES "STREAM stream/unstream fusion" forall s. stream (unstream s) = s #-}


-- ----------------------------------------------------------------------------
-- * Basic stream functions

length :: Stream Char -> Int
length = S.lengthI
{-# INLINE[0] length #-}

-- | /O(n)/ Reverse the characters of a string.
reverse :: Stream Char -> Text
reverse (Stream next s len0)
    | isEmpty len0 = I.empty
    | otherwise    = I.textP arr off' len'
  where
    len0' = upperBound 4 (larger len0 4)
    (arr, (off', len')) = A.run2 (A.new len0' >>= loop s (len0'-1) len0')
    loop !s0 !i !len marr =
        case next s0 of
          Done -> return (marr, (j, len-j))
              where j = i + 1
          Skip s1    -> loop s1 i len marr
          Yield x s1 | i < least -> {-# SCC "reverse/resize" #-} do
                       let newLen = len `shiftL` 1
                       marr' <- A.new newLen
                       A.copyM marr' (newLen-len) marr 0 len
                       write s1 (len+i) newLen marr'
                     | otherwise -> write s1 i len marr
            where n = ord x
                  least | n < 0x10000 = 0
                        | otherwise   = 1
                  m = n - 0x10000
                  lo = fromIntegral $ (m `shiftR` 10) + 0xD800
                  hi = fromIntegral $ (m .&. 0x3FF) + 0xDC00
                  write t j l mar
                      | n < 0x10000 = do
                          A.unsafeWrite mar j (fromIntegral n)
                          loop t (j-1) l mar
                      | otherwise = do
                          A.unsafeWrite mar (j-1) lo
                          A.unsafeWrite mar j hi
                          loop t (j-2) l mar
{-# INLINE [0] reverse #-}

-- | /O(n)/ Perform the equivalent of 'scanr' over a list, only with
-- the input and result reversed.
reverseScanr :: (Char -> Char -> Char) -> Char -> Stream Char -> Stream Char
reverseScanr f z0 (Stream next0 s0 len) = Stream next (S1 :*: z0 :*: s0) (len+1) -- HINT maybe too low
  where
    {-# INLINE next #-}
    next (S1 :*: z :*: s) = Yield z (S2 :*: z :*: s)
    next (S2 :*: z :*: s) = case next0 s of
                              Yield x s' -> let !x' = f x z
                                            in Yield x' (S2 :*: x' :*: s')
                              Skip s'    -> Skip (S2 :*: z :*: s')
                              Done       -> Done
{-# INLINE reverseScanr #-}

-- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a stream from a seed
-- value. However, the length of the result is limited by the
-- first argument to 'unfoldrN'. This function is more efficient than
-- 'unfoldr' when the length of the result is known.
unfoldrN :: Int -> (a -> Maybe (Char,a)) -> a -> Stream Char
unfoldrN n = S.unfoldrNI n
{-# INLINE [0] unfoldrN #-}

-------------------------------------------------------------------------------
-- ** Indexing streams

-- | /O(n)/ stream index (subscript) operator, starting from 0.
index :: Stream Char -> Int -> Char
index = S.indexI
{-# INLINE [0] index #-}

-- | The 'findIndex' function takes a predicate and a stream and
-- returns the index of the first element in the stream
-- satisfying the predicate.
findIndex :: (Char -> Bool) -> Stream Char -> Maybe Int
findIndex = S.findIndexI
{-# INLINE [0] findIndex #-}

-- | /O(n)/ The 'count' function returns the number of times the query
-- element appears in the given stream.
countChar :: Char -> Stream Char -> Int
countChar = S.countCharI
{-# INLINE [0] countChar #-}

-- | /O(n)/ Like a combination of 'map' and 'foldl''. Applies a
-- function to each element of a 'Text', passing an accumulating
-- parameter from left to right, and returns a final 'Text'.
mapAccumL :: (a -> Char -> (a,Char)) -> a -> Stream Char -> (a, Text)
mapAccumL f z0 (Stream next0 s0 len) = (nz,I.textP na 0 nl)
  where
    (na,(nz,nl)) = A.run2 (A.new mlen >>= \arr -> outer arr mlen z0 s0 0)
      where mlen = upperBound 4 len
    outer arr top = loop
      where
        loop !z !s !i =
            case next0 s of
              Done          -> return (arr, (z,i))
              Skip s'       -> loop z s' i
              Yield x s'
                | j >= top  -> {-# SCC "mapAccumL/resize" #-} do
                               let top' = (top + 1) `shiftL` 1
                               arr' <- A.new top'
                               A.copyM arr' 0 arr 0 top
                               outer arr' top' z s i
                | otherwise -> do let (z',c) = f z x
                                  d <- unsafeWrite arr i c
                                  loop z' s' (i+d)
                where j | ord x < 0x10000 = i
                        | otherwise       = i + 1
{-# INLINE [0] mapAccumL #-}
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.