Commits

Jasper Van der Jeugt committed 9e797a7

StripBrackets → Programs.StripTags

  • Participants
  • Parent commits cde9760

Comments (0)

Files changed (3)

tests/benchmarks/src/Data/Text/Benchmarks.hs

 import qualified Data.Text.Benchmarks.Pure as Pure
 import qualified Data.Text.Benchmarks.ReadNumbers as ReadNumbers
 import qualified Data.Text.Benchmarks.Replace as Replace
-import qualified Data.Text.Benchmarks.StripBrackets as StripBrackets
 import qualified Data.Text.Benchmarks.WordCount as WordCount
 
 import qualified Data.Text.Benchmarks.Programs.Cut as Programs.Cut
 import qualified Data.Text.Benchmarks.Programs.Sort as Programs.Sort
+import qualified Data.Text.Benchmarks.Programs.StripTags as Programs.StripTags
 import qualified Data.Text.Benchmarks.Programs.Throughput as Programs.Throughput
 
 main :: IO ()
         , Pure.benchmark (tf "japanese.txt")
         , ReadNumbers.benchmark (tf "numbers.txt")
         , Replace.benchmark (tf "russian.txt") sink "принимая" "своем"
-        , StripBrackets.benchmark (tf "russian.txt") sink
         , WordCount.benchmark (tf "russian.txt")
         ]
 
     ps <- bgroup "Programs" `fmap` sequence
         [ Programs.Cut.benchmark (tf "russian.txt") sink 20 40
         , Programs.Sort.benchmark (tf "russian.txt") sink
+        , Programs.StripTags.benchmark (tf "yiwiki.xml") sink
         , Programs.Throughput.benchmark (tf "russian.txt") sink
         ]
 

tests/benchmarks/src/Data/Text/Benchmarks/Programs/StripTags.hs

+-- | Program to replace HTML tags by whitespace
+--
+-- This program was originally contributed by Petr Prokhorenkov.
+--
+{-# OPTIONS_GHC -fspec-constr-count=5 #-}
+module Data.Text.Benchmarks.Programs.StripTags
+    ( benchmark
+    ) where
+     
+import Criterion (Benchmark, bgroup, bench)
+import Data.List (mapAccumL)
+import System.IO (Handle, hPutStr)
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Char8 as BC
+import qualified Data.Text as T
+import qualified Data.Text.Encoding as T
+
+benchmark :: FilePath -> Handle -> IO Benchmark
+benchmark i o = return $ bgroup "StripTags"
+    [ bench "String" $ readFile i >>= hPutStr o . string
+    , bench "ByteString" $ B.readFile i >>= B.hPutStr o . byteString
+    , bench "Text" $
+        B.readFile i >>= B.hPutStr o . T.encodeUtf8 . text . T.decodeUtf8
+    ]
+
+string :: String -> String
+string = snd . mapAccumL step 0
+
+text :: T.Text -> T.Text
+text = snd . T.mapAccumL step 0
+
+byteString :: B.ByteString -> B.ByteString
+byteString = snd . BC.mapAccumL step 0
+
+step :: Int -> Char -> (Int, Char)
+step d c
+    | d > 0 || d' > 0 = (d', ' ')
+    | otherwise       = (d', c)
+  where
+    d' = d + depth c
+    depth '>' = 1
+    depth '<' = -1
+    depth _   = 0

tests/benchmarks/src/Data/Text/Benchmarks/StripBrackets.hs

--- | Program to replace everything between brackets by spaces
---
--- This program was originally contributed by Petr Prokhorenkov.
---
-module Data.Text.Benchmarks.StripBrackets
-    ( benchmark
-    ) where
-     
-import Criterion (Benchmark, bench)
-import System.IO (Handle)
-import qualified Data.ByteString as B
-import qualified Data.Text as T
-import qualified Data.Text.Encoding as T
-
-benchmark :: FilePath -> Handle -> IO Benchmark
-benchmark fp sink = return $ bench "StripBrackets" $ do
-    t <- T.decodeUtf8 `fmap` B.readFile fp
-    B.hPutStr sink $ T.encodeUtf8 $ stripBrackets t
-
-stripBrackets :: T.Text -> T.Text
-stripBrackets = snd . T.mapAccumL f (0 :: Int)
-  where
-    f depth c =
-        let depth' = depth + d' c
-            c' | depth > 0 || depth' > 0 = ' '
-               | otherwise = c
-        in (depth', c')
-
-    d' '{' = 1
-    d' '[' = 1
-    d' '}' = -1
-    d' ']' = -1
-    d' _   = 0