text / tests / benchmarks / src / Data / Text / Benchmarks / Programs / StripTags.hs

-- | Program to replace HTML tags by whitespace
--
-- This program was originally contributed by Petr Prokhorenkov.
--
-- Tested in this benchmark:
--
-- * Reading the file
--
-- * Replacing text between HTML tags (<>) with whitespace
--
-- * Writing back to a handle
--
{-# OPTIONS_GHC -fspec-constr-count=5 #-}
module Data.Text.Benchmarks.Programs.StripTags
    ( benchmark
    ) where
     
import Criterion (Benchmark, bgroup, bench)
import Data.List (mapAccumL)
import System.IO (Handle, hPutStr)
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as BC
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.Text.IO as T

benchmark :: FilePath -> Handle -> IO Benchmark
benchmark i o = return $ bgroup "StripTags"
    [ bench "String" $ readFile i >>= hPutStr o . string
    , bench "ByteString" $ B.readFile i >>= B.hPutStr o . byteString
    , bench "Text" $ T.readFile i >>= T.hPutStr o . text
    , bench "TextByteString" $
        B.readFile i >>= B.hPutStr o . T.encodeUtf8 . text . T.decodeUtf8
    ]

string :: String -> String
string = snd . mapAccumL step 0

text :: T.Text -> T.Text
text = snd . T.mapAccumL step 0

byteString :: B.ByteString -> B.ByteString
byteString = snd . BC.mapAccumL step 0

step :: Int -> Char -> (Int, Char)
step d c
    | d > 0 || d' > 0 = (d', ' ')
    | otherwise       = (d', c)
  where
    d' = d + depth c
    depth '>' = 1
    depth '<' = -1
    depth _   = 0
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.