Commits

Bryan O'Sullivan committed 259a5bc

Record a sort-of-working fast search.

Comments (0)

Files changed (3)

Data/Text/Search.hs

+{-# LANGUAGE BangPatterns, ScopedTypeVariables #-}
+
+module Data.Text.Search where
+
+import qualified Data.Text as T ()
+import qualified Data.Text.Array as A
+import Data.Word (Word64)
+import Data.Text.Internal (Text(..))
+import Data.Bits ((.|.), (.&.))
+import Data.Text.UnsafeShift (shiftL)
+
+indices :: Text -> Text -> [Int]
+indices _needle@(Text narr noff nlen) _haystack@(Text harr hoff hlen)
+  | ldiff < 0 = []
+  | otherwise = outer 0
+  where
+    ldiff   = hlen - nlen
+    nlast   = nlen - 1
+    z       = nindex nlast
+    nindex k = A.unsafeIndex narr (noff+k)
+    hindex k = A.unsafeIndex harr (hoff+k)
+    (mask :: Word64, skip :: Int) = buildTable 0 0 (nlen-2)
+    buildTable !i !msk !skp
+        | i >= nlast           = (msk .|. swizzle z, skp)
+        | otherwise            = buildTable (i+1) (msk .|. swizzle c) skp'
+        where c                = nindex i
+              skp' | c == z    = nlen - i - 2
+                   | otherwise = skp
+    swizzle k = 1 `shiftL` (fromEnum k .&. 0x3f)
+    outer !i
+        | i > ldiff = []
+        | c == z    = if candidateMatch 0
+                      then i : outer (i + nlast)
+                      else if nextInPattern
+                           then outer (i + nlen)
+                           else outer (i + skip)
+        | nextInPattern = outer (i + nlen)
+        | otherwise     = outer (i + 1)
+        where c = hindex (i+nlast)
+              candidateMatch j
+                  | j >= nlast               = True
+                  | hindex (i+j) /= nindex j = False
+                  | otherwise                = candidateMatch (j+1)
+              nextInPattern = (mask .&. swizzle (hindex (i+nlen))) == 0

Data/Text/UnsafeShift.hs

     {-# INLINE shiftR #-}
     shiftR (W32# x#) (I# i#) = W32# (x# `uncheckedShiftRL#` i#)
 
-{-
 instance UnsafeShift Word64 where
     {-# INLINE shiftL #-}
     shiftL (W64# x#) (I# i#) = W64# (x# `uncheckedShiftL64#` i#)
 
     {-# INLINE shiftR #-}
     shiftR (W64# x#) (I# i#) = W64# (x# `uncheckedShiftRL64#` i#)
--}
 
 instance UnsafeShift Int where
     {-# INLINE shiftL #-}
     Data.Text.Foreign
     Data.Text.Lazy
     Data.Text.Lazy.Encoding
+    Data.Text.Search
   other-modules:
     Data.Text.Array
     Data.Text.Encoding.Fusion