-- > import Data.SuffixTree (STree)

-- > import qualified Data.SuffixTree as T

--- The implementation is based on the first o~~n~~e described in the

+-- The implementation is based on the first of those described in the

-- * Robert Giegerich and Stefan Kurtz, \"/A comparison of

-- imperative and purely functional suffix tree constructions/\",

-- Science of Computer Programming 25(2-3):187-218, 1995,

-- <http://citeseer.ist.psu.edu/giegerich95comparison.html>

+-- This implementation constructs the suffix tree lazily, so subtrees

+-- are not created until they are traversed. Two construction

+-- functions are provided, 'constructWith' for sequences composed of

+-- small alphabets, and 'construct' for larger alphabets.

+ -- * Other useful functions

-import Prelude hiding (elem~~, length, take~~)

+import Prelude hiding (elem)

import qualified Data.Map as M

import Data.List (foldl')

import Control.Arrow (second)

import qualified Data.ByteString as SB

import qualified Data.ByteString.Lazy as LB

-import ~~qualified Data.List as L~~

+import Data.Maybe (listToMaybe, mapMaybe)

+-- | The length of a prefix list. This type is formulated to do cheap

+-- work eagerly (to avoid constructing a pile of deferred thunks),

+-- while deferring potentially expensive work.

data Length a = Exactly {-# UNPACK #-} !Int

| Sum {-# UNPACK #-} !Int [a]

-length :: Length a -> Int

-length (Sum n xs) = n + L.length xs

+-- | The list of symbols that 'constructWith' can possibly see in its

+-- | The prefix string associated with an 'Edge'.

+newtype Prefix a = Prefix ([a], Length a)

+instance (Eq a) => Eq (Prefix a) where

+ a == b = prefix a == prefix b

+type EdgeFunction a = [[a]] -> (Length a, [[a]])

+-- | A suffix tree. The implementation is exposed to ease the

+-- development of custom traversal functions. Note that @('Prefix' a,

+-- 'STree' a)@ pairs are not stored in any order.

+data STree a = Node [(Prefix a, STree a)]

+-- | Obtain the list stored in a 'Prefix'.

+prefix :: Prefix a -> [a]

+prefix (Prefix (ys, Exactly n)) = take n ys

+prefix (Prefix (ys, Sum n xs)) = tk n ys

+ where tk 0 ys = zipWith (const id) xs ys

+ tk n (y:ys) = y : tk (n-1) ys

+-- | /O(n)/. Fold the edges in a tree, from bottom to top. Suitable

+fold :: (Prefix a -> b -> b) -> b -> STree a -> b

+fold f z (Node es) = foldr (\(e, t) v -> f e (fold f v t)) z es

+-- | /O(n)/. Fold the edges in a tree, from bottom to top. Suitable

+fold' :: (a -> Prefix b -> a) -> a -> STree b -> a

+fold' f z (Node es) = foldl' (\v (e, t) -> f (fold' f v t) e) z es

+-- | Increment the length of a prefix.

inc :: Length a -> Length a

inc (Exactly n) = Exactly (n+1)

inc (Sum n xs) = Sum (n+1) xs

-take :: Length a -> [a] -> [a]

-take (Exactly n) = L.take n

- where tk 0 ys = zipWith (const id) xs ys

- tk n (y:ys) = y : tk (n-1) ys

-type Edge a = ([a], Length a)

-type EdgeFunction a = [[a]] -> (Length a, [[a]])

-data STree a = Node [(Edge a, STree a)]

-fold :: ([a] -> b -> b) -> b -> STree a -> b

-fold f z (Node es) = foldr (\ ((l, n), t) v -> f (take n l) (fold f v t)) z es

-lazyTreeWith :: (Eq a) => EdgeFunction a -> [a] -> [a] -> STree a

+lazyTreeWith :: (Eq a) => EdgeFunction a -> Alphabet a -> [a] -> STree a

lazyTreeWith edge alphabet = suf . suffixes

- suf ss = Node [((a:sa, inc cpl), suf ssr)

+ suf ss = Node [(Prefix (a:sa, inc cpl), suf ssr)

n@(sa:_) <- [ss `clusterBy` a],

clusterBy ss a = [cs | c:cs <- ss, c == a]

+-- | Return all non-empty suffixes of the argument, longest first.

+-- >suffixes xs == init (tails xs)

suffixes xs@(_:xs') = xs : suffixes xs'

lazyTree :: (Ord a) => EdgeFunction a -> [a] -> STree a

lazyTree edge = suf . suffixes

- suf ss = Node [((a:sa, inc cpl), suf ssr)

+ suf ss = Node [(Prefix (a:sa, inc cpl), suf ssr)

| (a, n@(sa:_)) <- suffixMap ss,

cst [s] = (Sum 0 s, [[]])

| null [c | c:_ <- ss, a /= c] = let cpl' = inc cpl

- in cpl'~~ `seq`~~ (cpl', rss)

+ in seq cpl' (cpl', rss)

| otherwise = (Exactly 0, awss)

where (cpl, rss) = cst (w:[u | _:u <- ss])

{-# SPECIALISE constructWith :: [LB.ByteString] -> [LB.ByteString]

-> STree LB.ByteString #-}

{-# SPECIALISE constructWith :: (Eq a) => [[a]] -> [[a]] -> STree [a] #-}

-constructWith :: (Eq a) => [a] -> [a] -> STree a

+-- | /O(k n log n)/. Construct a suffix tree using the given

+-- alphabet. The performance of this function is linear in the size

+-- /k/ of the alphabet. That makes this function suitable for small

+-- alphabets, such as DNA nucleotides. For an alphabet containing

+-- more than a few symbols, 'construct' is usually several orders of

+constructWith :: (Eq a) => Alphabet a -> [a] -> STree a

constructWith = lazyTreeWith cst

{-# SPECIALISE construct :: [Char] -> STree Char #-}

{-# SPECIALISE construct :: [SB.ByteString] -> STree SB.ByteString #-}

{-# SPECIALISE construct :: [LB.ByteString] -> STree LB.ByteString #-}

{-# SPECIALISE construct :: (Ord a) => [[a]] -> STree [a] #-}

+-- | /O(n log n)/. Construct a suffix tree.

construct :: (Ord a) => [a] -> STree a

+suffix :: (Eq a) => [a] -> [a] -> Maybe [a]

+suffix (l:ls) (x:xs) | l == x = suffix ls xs

{-# SPECIALISE elem :: [Char] -> STree Char -> Bool #-}

{-# SPECIALISE elem :: [[Char]] -> STree [Char] -> Bool #-}

{-# SPECIALISE elem :: [SB.ByteString] -> STree SB.ByteString -> Bool #-}

{-# SPECIALISE elem :: [LB.ByteString] -> STree LB.ByteString -> Bool #-}

{-# SPECIALISE elem :: (Eq a) => [[a]] -> STree [a] -> Bool #-}

+-- | /O(n)/. Indicate the suffix tree contains the given subsequence.

+-- Performance is linear in the length of the subsequence.

elem :: (Eq a) => [a] -> STree a -> Bool

-elem xs (Node es) = any prefix es

- where prefix ((l, n), t) = maybe False (`elem` t) (rsuf (take n l) xs)

- rsuf (l:ls) (x:xs) | l == x = rsuf ls xs

+elem xs (Node es) = any pfx es

+ where pfx (e, t) = maybe False (`elem` t) (suffix (prefix e) xs)

+{-# SPECIALISE find :: [Char] -> STree Char

+ -> Maybe (Prefix Char, STree Char) #-}

+{-# SPECIALISE find :: [[Char]] -> STree [Char]

+ -> Maybe (Prefix [Char], STree [Char]) #-}

+{-# SPECIALISE find :: [SB.ByteString] -> STree SB.ByteString

+ -> Maybe (Prefix SB.ByteString, STree SB.ByteString) #-}

+{-# SPECIALISE find :: [LB.ByteString] -> STree LB.ByteString

+ -> Maybe (Prefix LB.ByteString, STree LB.ByteString) #-}

+{-# SPECIALISE find :: (Eq a) => [[a]] -> STree [a]

+ -> Maybe (Prefix [a], STree [a]) #-}

+-- | /O(n)/. Return the portion of the suffix tree at which the given

+-- subsequence is located. If the subsequence is not found, return

+find :: (Eq a) => [a] -> STree a -> Maybe (Prefix a, STree a)

+find xs (Node es) = listToMaybe (mapMaybe pfx es)

+ where pfx p@(e, t) = suffix (prefix e) xs >>= \suf ->