Commits

Jasper Van der Jeugt  committed 6f44b78

WordCount → WordFrequencies, purify and cleanup

  • Participants
  • Parent commits ac612e7

Comments (0)

Files changed (3)

File tests/benchmarks/src/Data/Text/Benchmarks.hs

 import qualified Data.Text.Benchmarks.ReadNumbers as ReadNumbers
 import qualified Data.Text.Benchmarks.Replace as Replace
 import qualified Data.Text.Benchmarks.Search as Search
-import qualified Data.Text.Benchmarks.WordCount as WordCount
+import qualified Data.Text.Benchmarks.WordFrequencies as WordFrequencies
 
 import qualified Data.Text.Benchmarks.Programs.Cut as Programs.Cut
 import qualified Data.Text.Benchmarks.Programs.Sort as Programs.Sort
         , ReadNumbers.benchmark (tf "numbers.txt")
         , Replace.benchmark (tf "russian.txt") "принимая" "своем"
         , Search.benchmark (tf "russian.txt") "принимая"
-        , WordCount.benchmark (tf "russian.txt")
+        , WordFrequencies.benchmark (tf "russian.txt")
         ]
 
     -- Program-like benchmarks

File tests/benchmarks/src/Data/Text/Benchmarks/WordCount.hs

--- | A word frequence count program
---
-module Data.Text.Benchmarks.WordCount
-    ( benchmark
-    ) where
-
-import Control.Exception (evaluate)
-import Criterion (Benchmark, bench)
-import Data.List (foldl')
-import Data.Map (Map)
-import qualified Data.Map as M
-import qualified Data.Text as T
-import qualified Data.Text.IO as T
-
-benchmark :: FilePath -> IO Benchmark
-benchmark fp = return $ bench "WordCount" $ do
-    t <- T.readFile fp
-    evaluate $ M.size $ wordCount t
-
-wordCount :: T.Text -> Map T.Text Int
-wordCount =
-    foldl' (\m k -> M.insertWith (+) k 1 m) M.empty . map T.toLower . T.words

File tests/benchmarks/src/Data/Text/Benchmarks/WordFrequencies.hs

+-- | A word frequency count using the different string types
+--
+module Data.Text.Benchmarks.WordFrequencies
+    ( benchmark
+    ) where
+
+import Criterion (Benchmark, bench, bgroup, whnf)
+import Data.Char (toLower)
+import Data.List (foldl')
+import Data.Map (Map)
+import qualified Data.ByteString.Char8 as B
+import qualified Data.Map as M
+import qualified Data.Text as T
+import qualified Data.Text.IO as T
+
+benchmark :: FilePath -> IO Benchmark
+benchmark fp = do
+    s <- readFile fp
+    b <- B.readFile fp
+    t <- T.readFile fp
+    return $ bgroup "WordFrequencies"
+        [ bench "String"     $ whnf (frequencies . words . map toLower)     s
+        , bench "ByteString" $ whnf (frequencies . B.words . B.map toLower) b
+        , bench "Text"       $ whnf (frequencies . T.words . T.toLower)     t
+        ]
+
+frequencies :: Ord a => [a] -> Map a Int
+frequencies = foldl' (\m k -> M.insertWith (+) k 1 m) M.empty