{-# LANGUAGE BangPatterns #-}

{-# OPTIONS_GHC -fno-warn-orphans #-}

+-- Copyright : (c) Tom Harper 2008-2009,

+-- (c) Bryan O'Sullivan 2009,

+-- (c) Duncan Coutts 2009

+-- Maintainer : rtharper@aftereternity.co.uk, bos@serpentine.com,

+-- Stability : experimental

+-- A time and space-efficient implementation of Unicode text using

+-- packed Word16 arrays. Suitable for performance critical use, both

+-- in terms of large data quantities and high speed.

+-- This module is intended to be imported @qualified@, to avoid name

+-- clashes with "Prelude" functions, e.g.

+-- > import qualified Data.Text as T

+ -- * Creation and elimination

+ -- ** Breaking into lines and words

+ -- * Zipping and unzipping

(&&),(||),(+),(-),(<),(>),(<=),(>=),(.),(>>=),

import Data.Char (isSpace)

import Control.Monad.ST(ST)

import Data.Array.Base(unsafeNewArray_,unsafeWrite,unsafeAt)

import Data.Text.UnsafeChar(unsafeChr)

import qualified Data.Text.Utf16 as U16

+-- Most of the functions in this module are subject to /array fusion/,

+-- meaning that a pipeline of functions will usually allocate at most

t1 == t2 = (stream t1) `S.eq` (stream t2)

-- -----------------------------------------------------------------------------

-- * Conversion to/from 'Text'

--- | /O(n)/ Convert a String into a Text.

+-- | /O(n)/ Convert a 'String' into a 'Text'.

--- This function is subject to array fusion, so calling other fusible

--- function(s) on a packed string will only cause one 'Text' to be written

--- out at the end of the pipeline, instead of one before and one after.

+-- This function is subject to array fusion.

pack str = (unstream (stream_list str))

S.Yield x s' -> x : unfold s'

{-# INLINE [1] unpack #-}

--- | Convert a character into a Text.

+-- | /O(1)/ Convert a character into a Text.

-- Subject to array fusion.

singleton :: Char -> Text

singleton c = unstream (Stream next (c:[]) 1)

-- -----------------------------------------------------------------------------

--- | /O(n)/ Adds a character to the front of a 'Text'. This function is more

--- costly than its 'List' counterpart because it requires copying a new array.

--- Subject to array fusion.

+-- | /O(n)/ Adds a character to the front of a 'Text'. This function

+-- is more costly than its 'List' counterpart because it requires

+-- copying a new array. Subject to array fusion.

cons :: Char -> Text -> Text

cons c t = unstream (S.cons c (stream t))

--- | /O(n)/ Adds a character to the end of a 'Text'. This copies the entire

--- array in the process.

--- Subject to array fusion.

+-- | /O(n)/ Adds a character to the end of a 'Text'. This copies the

+-- entire array in the process. Subject to array fusion.

snoc :: Text -> Char -> Text

snoc t c = unstream (S.snoc (stream t) c)

--- | /O(n)/ Appends one Text to the other by copying both of them into a new

--- Subject to array fusion

+-- | /O(n)/ Appends one 'Text' to the other by copying both of them

+-- into a new 'Text'. Subject to array fusion.

append :: Text -> Text -> Text

append (Text arr1 off1 len1) (Text arr2 off2 len2) = Text (runSTUArray x) 0 len

unstream (S.append (stream t1) (stream t2)) = append t1 t2

--- | /O(1)/ Returns the first character of a Text, which must be non-empty.

--- Subject to array fusion.

+-- | /O(1)/ Returns the first character of a 'Text', which must be

+-- non-empty. Subject to array fusion.

head t = S.head (stream t)

--- | /O(n)/ Returns the last character of a Text, which must be non-empty.

--- Subject to array fusion.

+-- | /O(n)/ Returns the last character of a 'Text', which must be

+-- non-empty. Subject to array fusion.

| len <= 0 = errorEmptyList "last"

--- | /O(1)/ Returns all characters after the head of a Text, which must

--- Subject to array fusion.

+-- | /O(1)/ Returns all characters after the head of a 'Text', which

+-- must be non-empty. Subject to array fusion.

| len <= 0 = errorEmptyList "tail"

--- | /O(1)/ Returns all but the last character of a Text, which

--- Subject to array fusion.

+-- | /O(1)/ Returns all but the last character of a 'Text', which must

+-- be non-empty. Subject to array fusion.

init (Text arr off len) | len <= 0 = errorEmptyList "init"

| n >= 0xDC00 && n <= 0xDFFF = Text arr off (len-2)

unstream (S.init (stream t)) = init t

--- | /O(1)/ Tests whether a Text is empty or not.

--- Subject to array fusion.

+-- | /O(1)/ Tests whether a 'Text' is empty or not. Subject to array

null t = S.null (stream t)

--- | /O(n)/ Returns the number of characters in a ~~t~~ext.

+-- | /O(n)/ Returns the number of characters in a 'Text'.

-- Subject to array fusion.

length (Text _arr _off len) = len

-- -----------------------------------------------------------------------------

--- | /O(n)/ 'map' @f @xs is the Text obtained by applying @f@ to each

--- Subject to array fusion.

+-- | /O(n)/ 'map' @f @xs is the 'Text' obtained by applying @f@ to

+-- each element of @xs@. Subject to array fusion.

map :: (Char -> Char) -> Text -> Text

map f t = unstream (S.map f (stream t))

--- | /O(n)/ The 'intersperse' function takes a character and places it between

--- the characters of a Text.

--- Subject to array fusion.

+-- | /O(n)/ The 'intersperse' function takes a character and places it

+-- between the characters of a 'Text'. Subject to array fusion.

intersperse :: Char -> Text -> Text

intersperse c t = unstream (S.intersperse c (stream t))

{-# INLINE intersperse #-}

--- | /O(n)/ The 'transpose' function transposes the rows and columns of its

--- Text argument. Note that this function uses pack, unpack, and the 'List'

--- version of transpose and is thus not very efficient.

+-- | /O(n)/ The 'transpose' function transposes the rows and columns

+-- of its 'Text' argument. Note that this function uses 'pack',

+-- 'unpack', and the 'List' version of transpose and is thus not very

transpose :: [Text] -> [Text]

transpose ts = P.map pack (L.transpose (P.map unpack ts))

-- -----------------------------------------------------------------------------

-- * Reducing 'Text's (folds)

--- | 'foldl', applied to a binary operator, a starting value (typically the

--- left-identity of the operator), and a Text, reduces the Text using the

--- binary operator, from left to right.

+-- | 'foldl', applied to a binary operator, a starting value

+-- (typically the left-identity of the operator), and a 'Text',

+-- reduces the 'Text' using the binary operator, from left to right.

-- Subject to array fusion.

foldl :: (b -> Char -> b) -> b -> Text -> b

foldl f z t = S.foldl f z (stream t)

foldl' f z t = S.foldl' f z (stream t)

--- | 'foldl1' is a variant of 'foldl' that has no starting value argument,

--- and thus must be applied to non-empty 'Text's.

--- Subject to array fusion.

+-- | A variant of 'foldl' that has no starting value argument, and

+-- thus must be applied to a non-empty 'Text'. Subject to array

foldl1 :: (Char -> Char -> Char) -> Text -> Char

foldl1 f t = S.foldl1 f (stream t)

foldl1' f t = S.foldl1' f (stream t)

--- | 'foldr', applied to a binary operator, a starting value (typically the

--- right-identity of the operator), and a Text, reduces the Text using the

--- binary operator, from right to left.

+-- | 'foldr', applied to a binary operator, a starting value

+-- (typically the right-identity of the operator), and a 'Text',

+-- reduces the 'Text' using the binary operator, from right to left.

-- Subject to array fusion.

foldr :: (Char -> b -> b) -> b -> Text -> b

foldr f z t = S.foldr f z (stream t)

--- | 'foldr1' is a variant of 'foldr' that has no starting value argument,

--- and thust must be applied to non-empty 'Text's.

--- Subject to array fusion.

+-- | A variant of 'foldr' that has no starting value argument, and

+-- thust must be applied to a non-empty 'Text'. Subject to array

foldr1 :: (Char -> Char -> Char) -> Text -> Char

foldr1 f t = S.foldr1 f (stream t)

concat ts = unstream (S.concat (L.map stream ts))

--- | Map a function over a Text that results in a Text and concatenate the

--- results. This function is subject to array fusion, and note that if in

--- 'concatMap' @f @xs, @f@ is defined in terms of fusible functions it will

+-- | Map a function over a 'Text' that results in a 'Text', and concatenate the

+-- results. This function is subject to array fusion.

+-- Note: if in 'concatMap' @f @xs, @f@ is defined in terms of fusible

+-- functions, it will also be fusible.

concatMap :: (Char -> Text) -> Text -> Text

concatMap f t = unstream (S.concatMap (stream . f) (stream t))

--- | 'any' @p @xs determines if any character in the 'Text' @xs@ satisifes the

--- predicate @p@. Subject to array fusion.

+-- | 'any' @p @xs determines whether any character in the 'Text' @xs@

+-- satisifes the predicate @p@. Subject to array fusion.

any :: (Char -> Bool) -> Text -> Bool

any p t = S.any p (stream t)

--- | 'all' @p @xs determines if all characters in the 'Text' @xs@ satisify the

--- predicate @p@. Subject to array fusion.

+-- | 'all' @p @xs determines whether all characters in the 'Text' @xs@

+-- satisify the predicate @p@. Subject to array fusion.

all :: (Char -> Bool) -> Text -> Bool

all p t = S.all p (stream t)

--- | /O(n)/ 'maximum' returns the maximum value from a 'Text', which must be

--- non-empty. Subject to array fusion.

+-- | /O(n)/ 'maximum' returns the maximum value from a 'Text', which

+-- must be non-empty. Subject to array fusion.

maximum t = S.maximum (stream t)

--- | /O(n)/ 'minimum' returns the minimum value from a 'Text', which must be

--- non-empty. Subject to array fusion.

+-- | /O(n)/ 'minimum' returns the minimum value from a 'Text', which

+-- must be non-empty. Subject to array fusion.

minimum t = S.minimum (stream t)

-- -----------------------------------------------------------------------------

-- ** Generating and unfolding 'Text's

--- /O(n)/, where @n@ is the length of the result. The unfoldr function

--- is analogous to the List 'unfoldr'. unfoldr builds a Text

--- from a seed value. The function takes the element and returns

--- Nothing if it is done producing the Text or returns Just

--- (a,b), in which case, a is the next Char in the string, and b is

--- the seed value for further production.

+-- | /O(n)/, where @n@ is the length of the result. The 'unfoldr'

+-- function is analogous to the List 'L.unfoldr'. 'unfoldr' builds a

+-- 'Text' from a seed value. The function takes the element and

+-- returns 'Nothing' if it is done producing the 'Text', otherwise

+-- 'Just' @(a,b)@. In this case, @a@ is the next 'Char' in the

+-- string, and @b@ is the seed value for further production.

unfoldr :: (a -> Maybe (Char,a)) -> a -> Text

unfoldr f s = unstream (S.unfoldr f s)

--- O(n) Like unfoldr, unfoldrN builds a Text from a seed

+-- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a 'Text' from a seed

-- value. However, the length of the result should be limited by the

--- first argument to unfoldrN. This function is more efficient than

--- unfoldr when the maximum length of the result and correct,

--- otherwise its complexity performance is similar to 'unfoldr'

+-- first argument to 'unfoldrN'. This function is more efficient than

+-- 'unfoldr' when the maximum length of the result is known and

+-- correct, otherwise its performance is similar to 'unfoldr'.

unfoldrN :: Int -> (a -> Maybe (Char,a)) -> a -> Text

unfoldrN n f s = unstream (S.unfoldrN n f s)

-- -----------------------------------------------------------------------------

--- O(n) 'take' @n, applied to a Text, returns the prefix of the

--- Text of length n, or the Text itself if n is greater than the

+-- /O(n) 'take' @n@, applied to a 'Text', returns the prefix of the

+-- 'Text' of length @n@, or the 'Text' itself if @n@ is greater than

+-- the length of the Text.

take :: Int -> Text -> Text

-take n (Text arr off len) = Text arr off (loop off 0)

+take n t@(Text arr off len)

+ | otherwise = Text arr off (loop off 0)

| i >= end || count >= n = i - off

unstream (S.take n (stream t)) = take n t

--- /O(n)/ 'drop' @n, applied to a Text, returns the suffix of the

--- Text of length @n, or the empty Text if @n is greater than the

+-- | /O(n)/ 'drop' @n@, applied to a 'Text', returns the suffix of the

+-- 'Text' of length @n@, or the empty 'Text' if @n@ is greater than the

+-- length of the 'Text'.

drop :: Int -> Text -> Text

-drop n (Text arr off len) = (Text arr newOff newLen)

+drop n t@(Text arr off len)

+ | otherwise = Text arr newOff newLen

(newOff, newLen) = loop off 0 len

unstream (S.drop n (stream t)) = drop n t

--- | 'takeWhile', applied to a predicate @p@ and a stream, returns the

--- longest prefix (possibly empty) of elements that satisfy p.

+-- | 'takeWhile', applied to a predicate @p@ and a 'Text', returns the

+-- longest prefix (possibly empty) of elements that satisfy @p@.

takeWhile :: (Char -> Bool) -> Text -> Text

takeWhile p t = unstream (S.takeWhile p (stream t))

--- | 'dropWhile' @p @xs returns the suffix remaining after 'takeWhile' @p @xs.

+-- | 'dropWhile' @p@ @xs@ returns the suffix remaining after

+-- 'takeWhile' @p@ @xs@.

dropWhile :: (Char -> Bool) -> Text -> Text

dropWhile p t = unstream (S.dropWhile p (stream t))