Commits

Jasper Van der Jeugt committed d618f12

FileIndices → Search, and purify the benchmark

Comments (0)

Files changed (3)

tests/benchmarks/src/Data/Text/Benchmarks.hs

 import qualified Data.Text.Benchmarks.DecodeUtf8 as DecodeUtf8
 import qualified Data.Text.Benchmarks.EncodeUtf8 as EncodeUtf8
 import qualified Data.Text.Benchmarks.Equality as Equality
-import qualified Data.Text.Benchmarks.FileIndices as FileIndices
 import qualified Data.Text.Benchmarks.FileRead as FileRead
 import qualified Data.Text.Benchmarks.FoldLines as FoldLines
 import qualified Data.Text.Benchmarks.HtmlCombinator as HtmlCombinator
 import qualified Data.Text.Benchmarks.Pure as Pure
 import qualified Data.Text.Benchmarks.ReadNumbers as ReadNumbers
 import qualified Data.Text.Benchmarks.Replace as Replace
+import qualified Data.Text.Benchmarks.Search as Search
 import qualified Data.Text.Benchmarks.WordCount as WordCount
 
 import qualified Data.Text.Benchmarks.Programs.Cut as Programs.Cut
         , DecodeUtf8.benchmark (tf "russian.txt")
         , EncodeUtf8.benchmark sink "επανάληψη 竺法蘭共譯"
         , Equality.benchmark (tf "japanese.txt")
-        , FileIndices.benchmark (tf "russian.txt") "принимая"
         , FileRead.benchmark (tf "russian.txt")
         , FoldLines.benchmark (tf "russian.txt")
         , HtmlCombinator.benchmark sink
         , Pure.benchmark (tf "japanese.txt")
         , ReadNumbers.benchmark (tf "numbers.txt")
         , Replace.benchmark (tf "russian.txt") sink "принимая" "своем"
+        , Search.benchmark (tf "russian.txt") "принимая"
         , WordCount.benchmark (tf "russian.txt")
         ]
 

tests/benchmarks/src/Data/Text/Benchmarks/FileIndices.hs

--- | Search for a pattern in a file, find the number of occurences
---
-module Data.Text.Benchmarks.FileIndices
-    ( benchmark
-    ) where
-
-import Control.Exception (evaluate)
-import Criterion (Benchmark, bench, bgroup)
-import qualified Data.ByteString as B
-import qualified Data.ByteString.Lazy as BL
-import qualified Data.ByteString.Lazy.Search as BL
-import qualified Data.Text.Lazy as TL
-import qualified Data.Text.Lazy.Encoding as TL
-import qualified Data.Text.Lazy.IO as TL
-
-benchmark :: FilePath -> TL.Text -> IO Benchmark
-benchmark fp t = return $ bgroup "FileIndices"
-    [ bench "LazyText"           $ TL.readFile fp >>= evaluate . text t
-    , bench "LazyByteString"     $ BL.readFile fp >>= evaluate . byteString b
-    ]
-  where
-    b = B.concat $ BL.toChunks $ TL.encodeUtf8 t
-
-text :: TL.Text -> TL.Text -> Int
-text needle = fromIntegral . TL.count needle
-
-byteString :: B.ByteString -> BL.ByteString -> Int
-byteString needle = length . BL.indices needle

tests/benchmarks/src/Data/Text/Benchmarks/Search.hs

+-- | Search for a pattern in a file, find the number of occurences
+--
+module Data.Text.Benchmarks.Search
+    ( benchmark
+    ) where
+
+import Criterion (Benchmark, bench, bgroup, whnf)
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Lazy as BL
+import qualified Data.ByteString.Lazy.Search as BL
+import qualified Data.ByteString.Search as B
+import qualified Data.Text as T
+import qualified Data.Text.Encoding as T
+import qualified Data.Text.IO as T
+import qualified Data.Text.Lazy as TL
+import qualified Data.Text.Lazy.IO as TL
+
+benchmark :: FilePath -> T.Text -> IO Benchmark
+benchmark fp needleT = do
+    b  <- B.readFile fp
+    bl <- BL.readFile fp
+    t  <- T.readFile fp
+    tl <- TL.readFile fp
+    return $ bgroup "FileIndices"
+        [ bench "ByteString"     $ whnf (byteString needleB)     b
+        , bench "LazyByteString" $ whnf (lazyByteString needleB) bl
+        , bench "Text"           $ whnf (text needleT)           t
+        , bench "LazyText"       $ whnf (lazyText needleTL)      tl
+        ]
+  where
+    needleB = T.encodeUtf8 needleT
+    needleTL = TL.fromChunks [needleT]
+
+byteString :: B.ByteString -> B.ByteString -> Int
+byteString needle = length . B.indices needle
+
+lazyByteString :: B.ByteString -> BL.ByteString -> Int
+lazyByteString needle = length . BL.indices needle
+
+text :: T.Text -> T.Text -> Int
+text = T.count
+
+lazyText :: TL.Text -> TL.Text -> Int
+lazyText needle = fromIntegral . TL.count needle
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.